Rethinking DownsamplingLocusIteratorByState with a flattened read structure. Samples are kept
independent while processing, and only merged back in a priority queue if necessary in a special variant of the ReadBackedPileup. This code is not live yet except in the case of naive deduping. Downsampling by sample temporarily disabled, and the ReadBackedPileup variant is sketchy and not well integrated with StratifiedAlignmentContext or the walkers. Cleanup to follow. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3540 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
804facb0cc
commit
c3b68cc58d
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -80,7 +81,7 @@ public class AlignmentContext {
|
|||
if ( skippedBases < 0 ) throw new StingException("BUG: skippedBases is -1 in Alignment context");
|
||||
|
||||
this.loc = loc;
|
||||
this.basePileup = new ReadBackedPileup(loc, reads, offsets);
|
||||
this.basePileup = new UnifiedReadBackedPileup(loc, reads, offsets);
|
||||
this.skippedBases = skippedBases;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -90,9 +90,9 @@ public class StratifiedAlignmentContext {
|
|||
int index = type.ordinal();
|
||||
if ( contexts[index] == null ) {
|
||||
if ( isExtended ) {
|
||||
contexts[index] = new AlignmentContext(loc , new ReadBackedExtendedEventPileup(loc, (ArrayList<ExtendedEventPileupElement>)((ArrayList<? extends PileupElement>)getPileupElements(type))));
|
||||
contexts[index] = new AlignmentContext(loc , new UnifiedReadBackedExtendedEventPileup(loc, (ArrayList<ExtendedEventPileupElement>)((ArrayList<? extends PileupElement>)getPileupElements(type))));
|
||||
} else {
|
||||
contexts[index] = new AlignmentContext(loc, new ReadBackedPileup(loc, getPileupElements(type)));
|
||||
contexts[index] = new AlignmentContext(loc, new UnifiedReadBackedPileup(loc, getPileupElements(type)));
|
||||
}
|
||||
}
|
||||
return contexts[index];
|
||||
|
|
@ -300,7 +300,7 @@ public class StratifiedAlignmentContext {
|
|||
|
||||
// dirty trick below. generics do not allow to cast pe (ArrayList<PileupElement>) directly to ArrayList<ExtendedEventPileupElement>,
|
||||
// so we first cast to "? extends" wildcard, then to what we actually need.
|
||||
if ( isExtended ) return new AlignmentContext(loc, new ReadBackedExtendedEventPileup(loc, (ArrayList< ExtendedEventPileupElement>)((ArrayList<? extends PileupElement>)pe)) );
|
||||
else return new AlignmentContext(loc, new ReadBackedPileup(loc,pe));
|
||||
if ( isExtended ) return new AlignmentContext(loc, new UnifiedReadBackedExtendedEventPileup(loc, (ArrayList< ExtendedEventPileupElement>)((ArrayList<? extends PileupElement>)pe)) );
|
||||
else return new AlignmentContext(loc, new UnifiedReadBackedPileup(loc,pe));
|
||||
}
|
||||
}
|
||||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.collections.MergingIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -136,7 +137,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
// calculate the number of skipped bases, and update lastLoc so we can do that again in the next()
|
||||
long skippedBases = getSkippedBases( rodSite );
|
||||
lastLoc = site;
|
||||
return new AlignmentContext(site, new ReadBackedPileup(site), skippedBases);
|
||||
return new AlignmentContext(site, new UnifiedReadBackedPileup(site), skippedBases);
|
||||
}
|
||||
|
||||
public LocusOverflowTracker getLocusOverflowTracker() {
|
||||
|
|
|
|||
|
|
@ -34,10 +34,7 @@ import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
|||
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -308,11 +305,15 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
|
||||
public void printState() {
|
||||
for ( SAMRecordState state : readStates ) {
|
||||
logger.debug(String.format("printState():"));
|
||||
SAMRecord read = state.getRead();
|
||||
int offset = state.getReadOffset();
|
||||
logger.debug(String.format(" read: %s(%d)=%s, cigar=%s", read.getReadName(), offset, (char)read.getReadBases()[offset], read.getCigarString()));
|
||||
for(String sampleName: sampleNames) {
|
||||
Iterator<SAMRecordState> iterator = readStates.iteratorForSample(sampleName);
|
||||
while(iterator.hasNext()) {
|
||||
SAMRecordState state = iterator.next();
|
||||
logger.debug(String.format("printState():"));
|
||||
SAMRecord read = state.getRead();
|
||||
int offset = state.getReadOffset();
|
||||
logger.debug(String.format(" read: %s(%d)=%s, cigar=%s", read.getReadName(), offset, (char)read.getReadBases()[offset], read.getCigarString()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -341,7 +342,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
private void lazyLoadNextAlignmentContext() {
|
||||
while(nextAlignmentContext == null && readStates.hasNext()) {
|
||||
// this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref:
|
||||
collectPendingReads(readInfo.getMaxReadsAtLocus());
|
||||
readStates.collectPendingReads();
|
||||
|
||||
int size = 0;
|
||||
int nDeletions = 0;
|
||||
|
|
@ -355,93 +356,112 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
// In this case, the subsequent call to next() will emit the normal pileup at the current base
|
||||
// and shift the position.
|
||||
if (readInfo.generateExtendedEvents() && hasExtendedEvents) {
|
||||
ArrayList<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size());
|
||||
Map<String,ReadBackedExtendedEventPileup> fullExtendedEventPileup = new HashMap<String,ReadBackedExtendedEventPileup>();
|
||||
|
||||
int maxDeletionLength = 0;
|
||||
|
||||
for ( SAMRecordState state : readStates ) {
|
||||
if ( state.hadIndel() ) {
|
||||
size++;
|
||||
if ( state.getEventBases() == null ) {
|
||||
nDeletions++;
|
||||
maxDeletionLength = Math.max(maxDeletionLength,state.getEventLength());
|
||||
}
|
||||
else nInsertions++;
|
||||
indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
|
||||
state.getReadEventStartOffset(),
|
||||
state.getEventLength(),
|
||||
state.getEventBases()) );
|
||||
|
||||
} else {
|
||||
if ( state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||
// this read has no indel associated with the previous position on the ref;
|
||||
// we count this read in only if it has actual bases, not N span...
|
||||
if ( state.getCurrentCigarOperator() != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci() ) {
|
||||
|
||||
// if cigar operator is D but the read has no extended event reported (that's why we ended
|
||||
// up in this branch), it means that we are currently inside a deletion that started earlier;
|
||||
// we count such reads (with a longer deletion spanning over a deletion at the previous base we are
|
||||
// about to report) only if includeReadsWithDeletionAtLoci is true.
|
||||
size++;
|
||||
indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
|
||||
state.getReadOffset()-1,
|
||||
-1) // length=-1 --> noevent
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( state.getRead().getMappingQuality() == 0 ) {
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
hasExtendedEvents = false; // we are done with extended events prior to current ref base
|
||||
SAMRecordState our1stState = readStates.getFirst();
|
||||
// get current location on the reference and decrement it by 1: the indels we just stepped over
|
||||
// are associated with the *previous* reference base
|
||||
GenomeLoc loc = GenomeLocParser.incPos(our1stState.getLocation(),-1);
|
||||
|
||||
for(String sampleName: sampleNames) {
|
||||
Iterator<SAMRecordState> iterator = readStates.iteratorForSample(sampleName);
|
||||
ArrayList<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size());
|
||||
|
||||
size = 0;
|
||||
nDeletions = 0;
|
||||
nInsertions = 0;
|
||||
nMQ0Reads = 0;
|
||||
int maxDeletionLength = 0;
|
||||
|
||||
while(iterator.hasNext()) {
|
||||
SAMRecordState state = iterator.next();
|
||||
if ( state.hadIndel() ) {
|
||||
size++;
|
||||
if ( state.getEventBases() == null ) {
|
||||
nDeletions++;
|
||||
maxDeletionLength = Math.max(maxDeletionLength,state.getEventLength());
|
||||
}
|
||||
else nInsertions++;
|
||||
indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
|
||||
state.getReadEventStartOffset(),
|
||||
state.getEventLength(),
|
||||
state.getEventBases()) );
|
||||
|
||||
} else {
|
||||
if ( state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||
// this read has no indel associated with the previous position on the ref;
|
||||
// we count this read in only if it has actual bases, not N span...
|
||||
if ( state.getCurrentCigarOperator() != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci() ) {
|
||||
|
||||
// if cigar operator is D but the read has no extended event reported (that's why we ended
|
||||
// up in this branch), it means that we are currently inside a deletion that started earlier;
|
||||
// we count such reads (with a longer deletion spanning over a deletion at the previous base we are
|
||||
// about to report) only if includeReadsWithDeletionAtLoci is true.
|
||||
size++;
|
||||
indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
|
||||
state.getReadOffset()-1,
|
||||
-1) // length=-1 --> noevent
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( state.getRead().getMappingQuality() == 0 ) {
|
||||
nMQ0Reads++;
|
||||
}
|
||||
if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sampleName,new UnifiedReadBackedExtendedEventPileup(loc,indelPile,size,maxDeletionLength,nDeletions,nInsertions,nMQ0Reads));
|
||||
}
|
||||
}
|
||||
hasExtendedEvents = false; // we are done with extended events prior to current ref base
|
||||
// System.out.println("Indel(s) at "+loc);
|
||||
// for ( ExtendedEventPileupElement pe : indelPile ) { if ( pe.isIndel() ) System.out.println(" "+pe.toString()); }
|
||||
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileup(loc, indelPile, size, maxDeletionLength, nInsertions, nDeletions, nMQ0Reads));
|
||||
nextAlignmentContext = new AlignmentContext(loc, new SampleSplitReadBackedExtendedEventPileup(loc, fullExtendedEventPileup));
|
||||
} else {
|
||||
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
|
||||
GenomeLoc location = getLocation();
|
||||
Map<String,ReadBackedPileup> fullPileup = new HashMap<String,ReadBackedPileup>();
|
||||
|
||||
// todo -- performance problem -- should be lazy, really
|
||||
GenomeLoc location = getLocation();
|
||||
for ( SAMRecordState state : readStates ) {
|
||||
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||
if ( filterRead(state.getRead(), location.getStart(), filters ) ) {
|
||||
discarded_bases++;
|
||||
//printStatus("Adaptor bases", discarded_adaptor_bases);
|
||||
continue;
|
||||
} else {
|
||||
observed_bases++;
|
||||
pile.add(new PileupElement(state.getRead(), state.getReadOffset()));
|
||||
size++;
|
||||
}
|
||||
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||
size++;
|
||||
pile.add(new PileupElement(state.getRead(), -1));
|
||||
nDeletions++;
|
||||
}
|
||||
for(String sampleName: sampleNames) {
|
||||
Iterator<SAMRecordState> iterator = readStates.iteratorForSample(sampleName);
|
||||
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
|
||||
|
||||
// todo -- this looks like a bug w.r.t. including reads with deletion at loci -- MAD 05/27/10
|
||||
if ( state.getRead().getMappingQuality() == 0 ) {
|
||||
nMQ0Reads++;
|
||||
size = 0;
|
||||
nDeletions = 0;
|
||||
nInsertions = 0;
|
||||
nMQ0Reads = 0;
|
||||
|
||||
while(iterator.hasNext()) {
|
||||
SAMRecordState state = iterator.next();
|
||||
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||
if ( filterRead(state.getRead(), location.getStart(), filters ) ) {
|
||||
discarded_bases++;
|
||||
//printStatus("Adaptor bases", discarded_adaptor_bases);
|
||||
continue;
|
||||
} else {
|
||||
observed_bases++;
|
||||
pile.add(new PileupElement(state.getRead(), state.getReadOffset()));
|
||||
size++;
|
||||
}
|
||||
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||
size++;
|
||||
pile.add(new PileupElement(state.getRead(), -1));
|
||||
nDeletions++;
|
||||
}
|
||||
|
||||
// todo -- this looks like a bug w.r.t. including reads with deletion at loci -- MAD 05/27/10
|
||||
if ( state.getRead().getMappingQuality() == 0 ) {
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
if( pile.size() != 0 ) fullPileup.put(sampleName,new UnifiedReadBackedPileup(location,pile,size,nDeletions,nMQ0Reads));
|
||||
}
|
||||
|
||||
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||
// if we got reads with non-D/N over the current position, we are done
|
||||
if ( pile.size() != 0 ) nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileup(location, pile, size, nDeletions, nMQ0Reads));
|
||||
if ( !fullPileup.isEmpty() ) nextAlignmentContext = new AlignmentContext(location, new SampleSplitReadBackedPileup(location, fullPileup));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void collectPendingReads(int maxReads) {
|
||||
readStates.collectPendingReads();
|
||||
}
|
||||
|
||||
// fast testing of position
|
||||
private boolean readIsPastCurrentPosition(SAMRecord read) {
|
||||
if ( readStates.isEmpty() )
|
||||
|
|
@ -475,18 +495,20 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
|
||||
private void updateReadStates() {
|
||||
Iterator<SAMRecordState> it = readStates.iterator();
|
||||
while ( it.hasNext() ) {
|
||||
SAMRecordState state = it.next();
|
||||
CigarOperator op = state.stepForwardOnGenome();
|
||||
if ( state.hadIndel() && readInfo.generateExtendedEvents() ) hasExtendedEvents = true;
|
||||
else {
|
||||
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
|
||||
// already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe
|
||||
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
|
||||
if ( op == null ) { // we've stepped off the end of the object
|
||||
//if (DEBUG) logger.debug(String.format(" removing read %s at %d", state.getRead().getReadName(), state.getRead().getAlignmentStart()));
|
||||
it.remove();
|
||||
for(String sampleName: sampleNames) {
|
||||
Iterator<SAMRecordState> it = readStates.iteratorForSample(sampleName);
|
||||
while ( it.hasNext() ) {
|
||||
SAMRecordState state = it.next();
|
||||
CigarOperator op = state.stepForwardOnGenome();
|
||||
if ( state.hadIndel() && readInfo.generateExtendedEvents() ) hasExtendedEvents = true;
|
||||
else {
|
||||
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
|
||||
// already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe
|
||||
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
|
||||
if ( op == null ) { // we've stepped off the end of the object
|
||||
//if (DEBUG) logger.debug(String.format(" removing read %s at %d", state.getRead().getReadName(), state.getRead().getAlignmentStart()));
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -512,16 +534,18 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
return this.overflowTracker;
|
||||
}
|
||||
|
||||
private class ReadStateManager implements Iterable<SAMRecordState> {
|
||||
private class ReadStateManager {
|
||||
private final PeekableIterator<SAMRecord> iterator;
|
||||
private final DownsamplingMethod downsamplingMethod;
|
||||
|
||||
private final Map<String,Collection<SAMRecord>> aggregatorsBySampleName = new HashMap<String,Collection<SAMRecord>>();
|
||||
private final ReadSelector firstReadSelector;
|
||||
private final SamplePartitioner samplePartitioner;
|
||||
|
||||
private final Map<String,Deque<SAMRecordState>> readStatesBySample = new HashMap<String,Deque<SAMRecordState>>();
|
||||
|
||||
private final int targetCoverage;
|
||||
private final int maxReadsAtLocus;
|
||||
|
||||
private final Map<String,Deque<List<SAMRecordState>>> readStatesBySample;
|
||||
private int totalReadStatesInHanger = 0;
|
||||
|
||||
/**
|
||||
|
|
@ -547,104 +571,59 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
this.targetCoverage = Integer.MAX_VALUE;
|
||||
}
|
||||
this.maxReadsAtLocus = maxReadsAtLocus;
|
||||
this.readStatesBySample = new HashMap<String,Deque<List<SAMRecordState>>>();
|
||||
|
||||
if(downsamplingMethod.type == DownsampleType.NONE) {
|
||||
aggregatorsBySampleName.put(null,new ArrayList<SAMRecord>());
|
||||
readStatesBySample.put(null,new LinkedList<List<SAMRecordState>>());
|
||||
}
|
||||
else if(downsamplingMethod.type == DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) {
|
||||
aggregatorsBySampleName.put(null,new ReservoirDownsampler<SAMRecord>(targetCoverage));
|
||||
readStatesBySample.put(null,new LinkedList<List<SAMRecordState>>());
|
||||
}
|
||||
else {
|
||||
for(String sampleName: sampleNames) {
|
||||
aggregatorsBySampleName.put(sampleName,new ReservoirDownsampler<SAMRecord>(targetCoverage));
|
||||
readStatesBySample.put(sampleName,new LinkedList<List<SAMRecordState>>());
|
||||
}
|
||||
samplePartitioner = new SamplePartitioner(sampleNames);
|
||||
for(String sampleName: sampleNames)
|
||||
readStatesBySample.put(sampleName,new LinkedList<SAMRecordState>());
|
||||
|
||||
ReadSelector primaryReadSelector;
|
||||
if(downsamplingMethod.type == DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) {
|
||||
primaryReadSelector = new NRandomReadSelector(samplePartitioner,targetCoverage);
|
||||
}
|
||||
else
|
||||
primaryReadSelector = samplePartitioner;
|
||||
|
||||
firstReadSelector = maxReadsAtLocus!=Integer.MAX_VALUE ? new FirstNReadSelector(primaryReadSelector,maxReadsAtLocus) : primaryReadSelector;
|
||||
}
|
||||
|
||||
public Iterator<SAMRecordState> iterator() {
|
||||
public Iterator<SAMRecordState> iteratorForSample(final String sampleName) {
|
||||
return new Iterator<SAMRecordState>() {
|
||||
private final Iterator<Iterator<List<SAMRecordState>>> sampleIterators;
|
||||
private Iterator<List<SAMRecordState>> sampleIterator;
|
||||
private List<SAMRecordState> currentAlignmentStart;
|
||||
private Iterator<SAMRecordState> alignmentStartIterator;
|
||||
private SAMRecordState nextReadState;
|
||||
|
||||
private int readsInHanger = totalReadStatesInHanger;
|
||||
|
||||
{
|
||||
List<Iterator<List<SAMRecordState>>> sampleIteratorList = new LinkedList<Iterator<List<SAMRecordState>>>();
|
||||
for(Deque<List<SAMRecordState>> hanger: readStatesBySample.values())
|
||||
sampleIteratorList.add(hanger.iterator());
|
||||
sampleIterators = sampleIteratorList.iterator();
|
||||
advance();
|
||||
}
|
||||
private Iterator<SAMRecordState> wrappedIterator = readStatesBySample.get(sampleName).iterator();
|
||||
|
||||
public boolean hasNext() {
|
||||
return readsInHanger > 0;
|
||||
return wrappedIterator.hasNext();
|
||||
}
|
||||
|
||||
public SAMRecordState next() {
|
||||
advance();
|
||||
if(nextReadState==null) throw new NoSuchElementException("reader is out of elements");
|
||||
try {
|
||||
return nextReadState;
|
||||
}
|
||||
finally {
|
||||
readsInHanger--;
|
||||
nextReadState = null;
|
||||
}
|
||||
return wrappedIterator.next();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
if(alignmentStartIterator == null)
|
||||
throw new StingException("Cannot remove read -- iterator is in an invalid state.");
|
||||
alignmentStartIterator.remove();
|
||||
if(currentAlignmentStart.isEmpty())
|
||||
sampleIterator.remove();
|
||||
wrappedIterator.remove();
|
||||
totalReadStatesInHanger--;
|
||||
}
|
||||
|
||||
private void advance() {
|
||||
// nextReadState != null indicates that we haven't returned this value from the next() method yet.
|
||||
if(nextReadState != null)
|
||||
return;
|
||||
while(alignmentStartIterator!=null&&alignmentStartIterator.hasNext()) {
|
||||
nextReadState = alignmentStartIterator.next();
|
||||
}
|
||||
while(nextReadState==null&&sampleIterator!=null&&sampleIterator.hasNext()) {
|
||||
currentAlignmentStart = sampleIterator.next();
|
||||
alignmentStartIterator = currentAlignmentStart!=null ? currentAlignmentStart.iterator() : null;
|
||||
nextReadState = alignmentStartIterator!=null&&alignmentStartIterator.hasNext() ? alignmentStartIterator.next() : null;
|
||||
}
|
||||
while(nextReadState==null&&sampleIterators.hasNext()) {
|
||||
sampleIterator = sampleIterators.next();
|
||||
currentAlignmentStart = sampleIterator!=null&&sampleIterator.hasNext() ? sampleIterator.next() : null;
|
||||
alignmentStartIterator = currentAlignmentStart!=null ? currentAlignmentStart.iterator() : null;
|
||||
nextReadState = alignmentStartIterator!=null&&alignmentStartIterator.hasNext() ? alignmentStartIterator.next() : null;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return readStatesBySample.isEmpty();
|
||||
return totalReadStatesInHanger == 0;
|
||||
}
|
||||
|
||||
public int size() {
|
||||
int size = 0;
|
||||
for(Deque<List<SAMRecordState>> readStatesByAlignmentStart: readStatesBySample.values()) {
|
||||
for(Collection<SAMRecordState> readStates: readStatesByAlignmentStart)
|
||||
size += readStates.size();
|
||||
for(Deque<SAMRecordState> readStates: readStatesBySample.values()) {
|
||||
size += readStates.size();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
public SAMRecordState getFirst() {
|
||||
return iterator().next();
|
||||
for(String sampleName: sampleNames) {
|
||||
Deque<SAMRecordState> reads = readStatesBySample.get(sampleName);
|
||||
if(!reads.isEmpty())
|
||||
return reads.peek();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
|
|
@ -652,38 +631,36 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
|
||||
public void collectPendingReads() {
|
||||
if(iterator.hasNext() && readStates.size() == 0) {
|
||||
if(!iterator.hasNext())
|
||||
return;
|
||||
|
||||
if(readStates.size() == 0) {
|
||||
int firstContigIndex = iterator.peek().getReferenceIndex();
|
||||
int firstAlignmentStart = iterator.peek().getAlignmentStart();
|
||||
while(iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) {
|
||||
SAMRecord read = iterator.next();
|
||||
Collection<SAMRecord> aggregator = getAggregator(read.getReadGroup()!=null ? read.getReadGroup().getSample() : null);
|
||||
aggregator.add(read);
|
||||
firstReadSelector.submitRead(iterator.next());
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Fast fail in the case that the read is past the current position.
|
||||
if(iterator.hasNext() && readIsPastCurrentPosition(iterator.peek()))
|
||||
if(readIsPastCurrentPosition(iterator.peek()))
|
||||
return;
|
||||
|
||||
while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) {
|
||||
SAMRecord read = iterator.next();
|
||||
Collection<SAMRecord> aggregator = getAggregator(read.getReadGroup()!=null ? read.getReadGroup().getSample() : null);
|
||||
aggregator.add(read);
|
||||
firstReadSelector.submitRead(iterator.next());
|
||||
}
|
||||
}
|
||||
firstReadSelector.complete();
|
||||
|
||||
int readStatesInHangerEntry = 0;
|
||||
|
||||
for(Map.Entry<String,Collection<SAMRecord>> entry: aggregatorsBySampleName.entrySet()) {
|
||||
String sampleName = entry.getKey();
|
||||
Collection<SAMRecord> aggregator = entry.getValue();
|
||||
for(String sampleName: sampleNames) {
|
||||
ReadSelector aggregator = samplePartitioner.getSelectedReads(sampleName);
|
||||
|
||||
Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator);
|
||||
aggregator.clear();
|
||||
Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads());
|
||||
|
||||
Deque<List<SAMRecordState>> hanger = readStatesBySample.get(sampleName);
|
||||
int readsInHanger = countReadsInHanger(sampleName);
|
||||
Deque<SAMRecordState> hanger = readStatesBySample.get(sampleName);
|
||||
int readsInHanger = hanger.size();
|
||||
|
||||
if(readsInHanger+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.NONE || downsamplingMethod.type==DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) {
|
||||
int readLimit = newReads.size();
|
||||
|
|
@ -692,9 +669,11 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
readLimit = maxReadsAtLocus-totalReadStatesInHanger;
|
||||
mrlViolation = true;
|
||||
}
|
||||
readStatesInHangerEntry += addReadsToHanger(hanger,newReads,readLimit,mrlViolation);
|
||||
readStatesInHangerEntry += addReadsToSample(hanger,newReads,readLimit,mrlViolation);
|
||||
}
|
||||
else {
|
||||
// TODO: implement downsampling mechanism
|
||||
/*
|
||||
Iterator<List<SAMRecordState>> backIterator = hanger.descendingIterator();
|
||||
boolean readPruned = true;
|
||||
while(readsInHanger+newReads.size()>targetCoverage && readPruned) {
|
||||
|
|
@ -714,36 +693,23 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
readsInHanger -= readsInFirstHanger.size();
|
||||
}
|
||||
|
||||
readStatesInHangerEntry += addReadsToHanger(hanger,newReads,targetCoverage-readsInHanger,false);
|
||||
readStatesInHangerEntry += addReadsToSample(hanger,newReads,targetCoverage-readsInHanger,false);
|
||||
*/
|
||||
}
|
||||
|
||||
totalReadStatesInHanger += readStatesInHangerEntry;
|
||||
}
|
||||
}
|
||||
|
||||
private Collection<SAMRecord> getAggregator(String sampleName) {
|
||||
if(downsamplingMethod.type == DownsampleType.EXPERIMENTAL_BY_SAMPLE)
|
||||
return aggregatorsBySampleName.get(sampleName);
|
||||
else
|
||||
return aggregatorsBySampleName.get(null);
|
||||
}
|
||||
|
||||
private int countReadsInHanger(final String sampleName) {
|
||||
int count = 0;
|
||||
for(List<SAMRecordState> hangerEntry: readStatesBySample.get(sampleName)) {
|
||||
count += hangerEntry.size();
|
||||
}
|
||||
return count;
|
||||
firstReadSelector.reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add reads with the given sample name to the given hanger entry.
|
||||
* @param newHangerEntry The hanger entry to add.
|
||||
* @param readStates The list of read states to add this collection of reads.
|
||||
* @param reads Reads to add. Selected reads will be pulled from this source.
|
||||
* @param maxReads Maximum number of reads to add.
|
||||
* @return Total number of reads added.
|
||||
*/
|
||||
private int addReadsToHanger(final Deque<List<SAMRecordState>> newHangerEntry, final Collection<SAMRecord> reads, final int maxReads, boolean atMaxReadsAtLocusLimit) {
|
||||
private int addReadsToSample(final Deque<SAMRecordState> readStates, final Collection<SAMRecord> reads, final int maxReads, boolean atMaxReadsAtLocusLimit) {
|
||||
if(reads.isEmpty())
|
||||
return 0;
|
||||
|
||||
|
|
@ -752,7 +718,6 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
// the farthest right a read extends
|
||||
Integer rightMostEnd = -1;
|
||||
|
||||
List<SAMRecordState> readStates = new LinkedList<SAMRecordState>();
|
||||
int readCount = 0;
|
||||
for(SAMRecord read: reads) {
|
||||
if(readCount <= maxReads) {
|
||||
|
|
@ -769,7 +734,6 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
rightMostEnd = (read.getAlignmentEnd() > rightMostEnd) ? read.getAlignmentEnd() : rightMostEnd;
|
||||
}
|
||||
}
|
||||
newHangerEntry.add(readStates);
|
||||
|
||||
if (location != null)
|
||||
overflowTracker.exceeded(GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart(),rightMostEnd),
|
||||
|
|
@ -780,3 +744,247 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Selects reads passed to it based on a criteria decided through inheritance.
|
||||
*/
|
||||
interface ReadSelector {
|
||||
/**
|
||||
* All previous selectors in the chain have allowed this read. Submit it to this selector for consideration.
|
||||
* @param read the read to evaluate.
|
||||
*/
|
||||
public void submitRead(SAMRecord read);
|
||||
|
||||
/**
|
||||
* A previous selector has deemed this read unfit. Notify this selector so that this selector's counts are valid.
|
||||
* @param read the read previously rejected.
|
||||
*/
|
||||
public void notifyReadRejected(SAMRecord read);
|
||||
|
||||
/**
|
||||
* Signal the selector that read additions are complete.
|
||||
*/
|
||||
public void complete();
|
||||
|
||||
/**
|
||||
* Retrieve the number of reads seen by this selector so far.
|
||||
* @return number of reads seen.
|
||||
*/
|
||||
public long getNumReadsSeen();
|
||||
|
||||
/**
|
||||
* Return the number of reads accepted by this selector so far.
|
||||
* @return number of reads selected.
|
||||
*/
|
||||
public long getNumReadsSelected();
|
||||
|
||||
/**
|
||||
* Get the reads selected by this selector.
|
||||
* @return collection of reads selected by this selector.
|
||||
*/
|
||||
public Collection<SAMRecord> getSelectedReads();
|
||||
|
||||
/**
|
||||
* Reset this collection to its pre-gathered state.
|
||||
*/
|
||||
public void reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose the first N reads from the submitted set.
|
||||
*/
|
||||
class FirstNReadSelector implements ReadSelector {
|
||||
private final ReadSelector chainedSelector;
|
||||
|
||||
private final Collection<SAMRecord> selectedReads = new LinkedList<SAMRecord>();
|
||||
private final long readLimit;
|
||||
private long readsSeen = 0;
|
||||
|
||||
public FirstNReadSelector(ReadSelector chainedSelector, long readLimit) {
|
||||
this.chainedSelector = chainedSelector;
|
||||
this.readLimit = readLimit;
|
||||
}
|
||||
|
||||
public void submitRead(SAMRecord read) {
|
||||
if(readsSeen > readLimit) {
|
||||
selectedReads.add(read);
|
||||
if(chainedSelector != null)
|
||||
chainedSelector.submitRead(read);
|
||||
}
|
||||
else
|
||||
if(chainedSelector != null)
|
||||
chainedSelector.notifyReadRejected(read);
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void notifyReadRejected(SAMRecord read) {
|
||||
if(chainedSelector != null)
|
||||
chainedSelector.notifyReadRejected(read);
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void complete() {
|
||||
if(chainedSelector != null)
|
||||
chainedSelector.complete();
|
||||
}
|
||||
|
||||
public long getNumReadsSeen() {
|
||||
return readsSeen;
|
||||
}
|
||||
|
||||
public long getNumReadsSelected() {
|
||||
return selectedReads.size();
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> getSelectedReads() {
|
||||
return selectedReads;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
selectedReads.clear();
|
||||
readsSeen = 0;
|
||||
if(chainedSelector != null)
|
||||
chainedSelector.reset();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Select N reads randomly from the input stream.
|
||||
*/
|
||||
class NRandomReadSelector implements ReadSelector {
|
||||
private final ReservoirDownsampler<SAMRecord> reservoir;
|
||||
private final ReadSelector chainedSelector;
|
||||
private long readsSeen = 0;
|
||||
|
||||
public NRandomReadSelector(ReadSelector chainedSelector, long readLimit) {
|
||||
this.reservoir = new ReservoirDownsampler<SAMRecord>((int)readLimit);
|
||||
this.chainedSelector = chainedSelector;
|
||||
}
|
||||
|
||||
public void submitRead(SAMRecord read) {
|
||||
SAMRecord displaced = reservoir.add(read);
|
||||
if(displaced != null && chainedSelector != null)
|
||||
chainedSelector.notifyReadRejected(read);
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void notifyReadRejected(SAMRecord read) {
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void complete() {
|
||||
for(SAMRecord read: reservoir.getDownsampledContents())
|
||||
chainedSelector.submitRead(read);
|
||||
if(chainedSelector != null)
|
||||
chainedSelector.complete();
|
||||
}
|
||||
|
||||
|
||||
public long getNumReadsSeen() {
|
||||
return readsSeen;
|
||||
}
|
||||
|
||||
public long getNumReadsSelected() {
|
||||
return reservoir.size();
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> getSelectedReads() {
|
||||
return reservoir.getDownsampledContents();
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
reservoir.clear();
|
||||
if(chainedSelector != null)
|
||||
chainedSelector.reset();
|
||||
}
|
||||
}
|
||||
|
||||
class SamplePartitioner implements ReadSelector {
|
||||
private final Map<String,SampleStorage> readsBySample;
|
||||
private long readsSeen = 0;
|
||||
|
||||
public SamplePartitioner(Collection<String> sampleNames) {
|
||||
readsBySample = new HashMap<String,SampleStorage>();
|
||||
for(String sampleName: sampleNames)
|
||||
readsBySample.put(sampleName,new SampleStorage());
|
||||
}
|
||||
|
||||
public void submitRead(SAMRecord read) {
|
||||
String sampleName = read.getReadGroup()!=null ? read.getReadGroup().getSample() : null;
|
||||
if(readsBySample.containsKey(sampleName))
|
||||
readsBySample.get(sampleName).submitRead(read);
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void notifyReadRejected(SAMRecord read) {
|
||||
String sampleName = read.getReadGroup()!=null ? read.getReadGroup().getSample() : null;
|
||||
if(readsBySample.containsKey(sampleName))
|
||||
readsBySample.get(sampleName).notifyReadRejected(read);
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void complete() {
|
||||
// NO-OP.
|
||||
}
|
||||
|
||||
public long getNumReadsSeen() {
|
||||
return readsSeen;
|
||||
}
|
||||
|
||||
public long getNumReadsSelected() {
|
||||
return readsSeen;
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> getSelectedReads() {
|
||||
throw new UnsupportedOperationException("Cannot directly get selected reads from a read partitioner.");
|
||||
}
|
||||
|
||||
public ReadSelector getSelectedReads(String sampleName) {
|
||||
if(!readsBySample.containsKey(sampleName))
|
||||
throw new NoSuchElementException("Sample name not found");
|
||||
return readsBySample.get(sampleName);
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
for(SampleStorage storage: readsBySample.values())
|
||||
storage.reset();
|
||||
readsSeen = 0;
|
||||
}
|
||||
|
||||
private class SampleStorage implements ReadSelector {
|
||||
private Collection<SAMRecord> reads = new LinkedList<SAMRecord>();
|
||||
private long readsSeen = 0;
|
||||
|
||||
public void submitRead(SAMRecord read) {
|
||||
reads.add(read);
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void notifyReadRejected(SAMRecord read) {
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void complete() {
|
||||
// NO-OP.
|
||||
}
|
||||
|
||||
public long getNumReadsSeen() {
|
||||
return readsSeen;
|
||||
}
|
||||
|
||||
public long getNumReadsSelected() {
|
||||
return readsSeen;
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> getSelectedReads() {
|
||||
return reads;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
reads.clear();
|
||||
readsSeen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -33,10 +33,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -399,7 +396,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
GenomeLoc loc = GenomeLocParser.incPos(our1stState.getLocation(),-1);
|
||||
// System.out.println("Indel(s) at "+loc);
|
||||
// for ( ExtendedEventPileupElement pe : indelPile ) { if ( pe.isIndel() ) System.out.println(" "+pe.toString()); }
|
||||
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileup(loc, indelPile, size, maxDeletionLength, nInsertions, nDeletions, nMQ0Reads));
|
||||
nextAlignmentContext = new AlignmentContext(loc, new UnifiedReadBackedExtendedEventPileup(loc, indelPile, size, maxDeletionLength, nInsertions, nDeletions, nMQ0Reads));
|
||||
} else {
|
||||
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
|
||||
|
||||
|
|
@ -430,7 +427,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
GenomeLoc loc = getLocation();
|
||||
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||
// if we got reads with non-D/N over the current position, we are done
|
||||
if ( pile.size() != 0 ) nextAlignmentContext = new AlignmentContext(loc, new ReadBackedPileup(loc, pile, size, nDeletions, nMQ0Reads));
|
||||
if ( pile.size() != 0 ) nextAlignmentContext = new AlignmentContext(loc, new UnifiedReadBackedPileup(loc, pile, size, nDeletions, nMQ0Reads));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -183,7 +184,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
|||
if ( DEBUG ) logger.debug(String.format("*** TraverseDuplicates.traverse at %s with %d read sets", site, readSets.size()));
|
||||
|
||||
// Jump forward in the reference to this locus location
|
||||
AlignmentContext locus = new AlignmentContext(site, new ReadBackedPileup(site));
|
||||
AlignmentContext locus = new AlignmentContext(site, new UnifiedReadBackedPileup(site));
|
||||
|
||||
// update the number of duplicate sets we've seen
|
||||
TraversalStatistics.nRecords++;
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
|
||||
|
||||
/**
|
||||
* A simple solution to iterating over all reference positions over a series of genomic locations.
|
||||
|
|
@ -92,7 +93,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
|
|||
long nSkipped = rodLocusView.getLastSkippedBases();
|
||||
if ( nSkipped > 0 ) {
|
||||
GenomeLoc site = rodLocusView.getLocOneBeyondShard();
|
||||
AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileup(site), nSkipped);
|
||||
AlignmentContext ac = new AlignmentContext(site, new UnifiedReadBackedPileup(site), nSkipped);
|
||||
M x = walker.map(null, null, ac);
|
||||
sum = walker.reduce(x, sum);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ import org.broadinstitute.sting.utils.collections.Pair;
|
|||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ExtendedPileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
|
||||
|
||||
import java.util.*;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
|
@ -265,7 +266,7 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
|||
}
|
||||
}
|
||||
|
||||
return new ReadBackedPileup(pileup.getLocation(), reads, offsets);
|
||||
return new UnifiedReadBackedPileup(pileup.getLocation(), reads, offsets);
|
||||
}
|
||||
|
||||
private static ReadBackedPileup getPileupOfAllele( Allele allele, ReadBackedPileup pileup ) {
|
||||
|
|
@ -278,7 +279,7 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
|||
}
|
||||
}
|
||||
|
||||
return new ReadBackedPileup(pileup.getLocation(), filteredPileup);
|
||||
return new UnifiedReadBackedPileup(pileup.getLocation(), filteredPileup);
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("HaplotypeScore"); }
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.genotype.*;
|
|||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -158,7 +159,7 @@ public class BatchedCallsMerger extends LocusWalker<VariantContext, Integer> imp
|
|||
if ( readGroup != null && samples.contains(readGroup.getSample()) )
|
||||
newPileup.add(p);
|
||||
}
|
||||
return new AlignmentContext(pileup.getLocation(), new ReadBackedPileup(pileup.getLocation(), newPileup));
|
||||
return new AlignmentContext(pileup.getLocation(), new UnifiedReadBackedPileup(pileup.getLocation(), newPileup));
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -45,10 +45,7 @@ import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
|||
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
|
@ -241,7 +238,7 @@ public class UnifiedGenotyperEngine {
|
|||
AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES )
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
return new ReadBackedPileup(pileup.getLocation(), filteredPileup);
|
||||
return new UnifiedReadBackedPileup(pileup.getLocation(), filteredPileup);
|
||||
}
|
||||
|
||||
// filter based on maximum mismatches and bad mates
|
||||
|
|
@ -253,7 +250,7 @@ public class UnifiedGenotyperEngine {
|
|||
AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES )
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
return new ReadBackedExtendedEventPileup(pileup.getLocation(), filteredPileup);
|
||||
return new UnifiedReadBackedExtendedEventPileup(pileup.getLocation(), filteredPileup);
|
||||
}
|
||||
|
||||
private static boolean isValidDeletionFraction(double d) {
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ import java.util.*;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class ReservoirDownsampler<T> implements Collection<T> {
|
||||
public class ReservoirDownsampler<T> {
|
||||
/**
|
||||
* Create a random number generator with a random, but reproducible, seed.
|
||||
*/
|
||||
|
|
@ -40,31 +40,35 @@ public class ReservoirDownsampler<T> implements Collection<T> {
|
|||
this.maxElements = maxElements;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean add(T element) {
|
||||
/**
|
||||
* Returns the eliminated element.
|
||||
* @param element Eliminated element; null if no element has been eliminated.
|
||||
* @return
|
||||
*/
|
||||
public T add(T element) {
|
||||
if(maxElements <= 0)
|
||||
return false;
|
||||
return element;
|
||||
else if(reservoir.size() < maxElements) {
|
||||
reservoir.add(element);
|
||||
return true;
|
||||
return null;
|
||||
}
|
||||
else {
|
||||
// Get a uniformly distributed int. If the chosen slot lives within the partition, replace the entry in that slot with the newest entry.
|
||||
int slot = random.nextInt(maxElements);
|
||||
if(slot >= 0 && slot < maxElements) {
|
||||
T displaced = reservoir.get(slot);
|
||||
reservoir.set(slot,element);
|
||||
return true;
|
||||
return displaced;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
return element;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean addAll(Collection<? extends T> elements) {
|
||||
boolean added = false;
|
||||
for(T element: elements)
|
||||
added |= add(element);
|
||||
added |= (add(element) != null);
|
||||
return added;
|
||||
}
|
||||
|
||||
|
|
@ -73,62 +77,51 @@ public class ReservoirDownsampler<T> implements Collection<T> {
|
|||
* @return The downsampled contents of this reservoir.
|
||||
*/
|
||||
public Collection<T> getDownsampledContents() {
|
||||
return (Collection<T>)reservoir.clone();
|
||||
return reservoir;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
reservoir.clear();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return reservoir.isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return reservoir.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<T> iterator() {
|
||||
return reservoir.iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(Object o) {
|
||||
return reservoir.contains(o);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean containsAll(Collection<?> elements) {
|
||||
return reservoir.containsAll(elements);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean retainAll(Collection<?> elements) {
|
||||
return reservoir.retainAll(elements);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean remove(Object o) {
|
||||
return reservoir.remove(o);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean removeAll(Collection<?> elements) {
|
||||
return reservoir.removeAll(elements);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object[] toArray() {
|
||||
Object[] contents = new Object[reservoir.size()];
|
||||
reservoir.toArray(contents);
|
||||
return contents;
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> T[] toArray(T[] array) {
|
||||
return reservoir.toArray(array);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.*;
|
|||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
|
@ -114,7 +115,7 @@ public class DupUtils {
|
|||
|
||||
private static Pair<Byte, Byte> combineBaseProbs(List<SAMRecord> duplicates, int readOffset, int maxQScore) {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(duplicates.get(0));
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(loc, duplicates, readOffset);
|
||||
ReadBackedPileup pileup = new UnifiedReadBackedPileup(loc, duplicates, readOffset);
|
||||
|
||||
final boolean debug = false;
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Map;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Merges multiple pileups broken down by sample.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
class MergingPileupElementIterator implements Iterator<PileupElement> {
|
||||
private final PriorityQueue<PeekableIterator<? extends PileupElement>> perSampleIterators;
|
||||
|
||||
public MergingPileupElementIterator(Map<String,? extends Object> pileupsPerSample) {
|
||||
perSampleIterators = new PriorityQueue<PeekableIterator<? extends PileupElement>>(pileupsPerSample.size(),new PileupElementIteratorComparator());
|
||||
for(Object pileupForSample: pileupsPerSample.values()) {
|
||||
if(pileupForSample instanceof ReadBackedPileup) {
|
||||
ReadBackedPileup pileup = (ReadBackedPileup)pileupForSample;
|
||||
if(pileup.size() != 0)
|
||||
perSampleIterators.add(new PeekableIterator<PileupElement>(pileup.iterator()));
|
||||
}
|
||||
else if(pileupForSample instanceof ReadBackedExtendedEventPileup) {
|
||||
ReadBackedExtendedEventPileup pileup = (ReadBackedExtendedEventPileup)pileupForSample;
|
||||
if(pileup.size() != 0)
|
||||
perSampleIterators.add(new PeekableIterator<ExtendedEventPileupElement>(pileup.iterator()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return !perSampleIterators.isEmpty();
|
||||
}
|
||||
|
||||
public PileupElement next() {
|
||||
PeekableIterator<? extends PileupElement> currentIterator = perSampleIterators.remove();
|
||||
PileupElement current = currentIterator.next();
|
||||
if(currentIterator.hasNext())
|
||||
perSampleIterators.add(currentIterator);
|
||||
return current;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Cannot remove from a merging iterator.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two peekable iterators consisting of pileup elements.
|
||||
*/
|
||||
private class PileupElementIteratorComparator implements Comparator<PeekableIterator<? extends PileupElement>> {
|
||||
public int compare(PeekableIterator<? extends PileupElement> lhs, PeekableIterator<? extends PileupElement> rhs) {
|
||||
return rhs.peek().getOffset() - lhs.peek().getOffset();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,426 +1,141 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Dec 29, 2009
|
||||
* Time: 12:25:55 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadBackedExtendedEventPileup implements Iterable<ExtendedEventPileupElement> {
|
||||
private GenomeLoc loc = null;
|
||||
private ArrayList<ExtendedEventPileupElement> pileup = null;
|
||||
|
||||
private int size = 0; // cached value of the size of the pileup
|
||||
private int maxDeletionLength = 0; // cached value of the length of the longest deletion observed at the site
|
||||
private int nDeletions = 0; // cached value of the number of deletions
|
||||
private int nInsertions = 0;
|
||||
private int nMQ0Reads = 0; // cached value of the number of MQ0 reads
|
||||
|
||||
/**
|
||||
* Create a new version of a read backed pileup at loc without any aligned reads
|
||||
*
|
||||
*/
|
||||
public ReadBackedExtendedEventPileup(GenomeLoc loc ) {
|
||||
this(loc, new ArrayList<ExtendedEventPileupElement>(0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
|
||||
* offsets. This lower level constructure assumes pileup is well-formed and merely keeps a
|
||||
* pointer to pileup. Don't go changing the data in pileup.
|
||||
*
|
||||
*/
|
||||
public ReadBackedExtendedEventPileup(GenomeLoc loc, ArrayList<ExtendedEventPileupElement> pileup ) {
|
||||
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedExtendedEventPileup");
|
||||
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedExtendedEventPileup");
|
||||
|
||||
this.loc = loc;
|
||||
this.pileup = pileup;
|
||||
|
||||
calculatedCachedData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimization of above constructor where all of the cached data is provided
|
||||
* @param loc
|
||||
* @param pileup
|
||||
*/
|
||||
public ReadBackedExtendedEventPileup(GenomeLoc loc, ArrayList<ExtendedEventPileupElement> pileup, int size,
|
||||
int maxDeletionLength, int nInsertions, int nDeletions, int nMQ0Reads ) {
|
||||
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedExtendedEventPileup");
|
||||
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedExtendedEventPileup");
|
||||
|
||||
this.loc = loc;
|
||||
this.pileup = pileup;
|
||||
this.size = size;
|
||||
this.maxDeletionLength = maxDeletionLength;
|
||||
this.nDeletions = nDeletions;
|
||||
this.nInsertions = nInsertions;
|
||||
this.nMQ0Reads = nMQ0Reads;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
|
||||
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
|
||||
* sizes, nDeletion, etc. over and over potentially.
|
||||
*/
|
||||
private void calculatedCachedData() {
|
||||
size = 0;
|
||||
nDeletions = 0;
|
||||
nInsertions = 0;
|
||||
nMQ0Reads = 0;
|
||||
|
||||
for ( ExtendedEventPileupElement p : this ) {
|
||||
|
||||
size++;
|
||||
if ( p.isDeletion() ) {
|
||||
nDeletions++;
|
||||
maxDeletionLength = Math.max(maxDeletionLength, p.getEventLength());
|
||||
} else {
|
||||
if ( p.isInsertion() ) nInsertions++;
|
||||
}
|
||||
|
||||
if ( p.getRead().getMappingQuality() == 0 ) {
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// Special 'constructors'
|
||||
//
|
||||
// --------------------------------------------------------
|
||||
|
||||
|
||||
/**
|
||||
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
|
||||
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
|
||||
* of the pileup (just returns this) if there are no MQ0 reads in the pileup.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public ReadBackedExtendedEventPileup getPileupWithoutMappingQualityZeroReads() {
|
||||
|
||||
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
|
||||
ArrayList<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
|
||||
for ( ExtendedEventPileupElement p : pileup ) {
|
||||
if ( p.getRead().getMappingQuality() > 0 ) {
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
}
|
||||
return new ReadBackedExtendedEventPileup(loc, filteredPileup);
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
public ReadBackedExtendedEventPileup getMappingFilteredPileup( int minMapQ ) {
|
||||
ArrayList<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
|
||||
|
||||
for ( ExtendedEventPileupElement p : pileup ) {
|
||||
if ( p.getRead().getMappingQuality() >= minMapQ ) {
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
}
|
||||
|
||||
return new ReadBackedExtendedEventPileup(loc, filteredPileup);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a pileup randomly downsampled to the desiredCoverage.
|
||||
*
|
||||
* @param desiredCoverage
|
||||
* @return
|
||||
*/
|
||||
public ReadBackedExtendedEventPileup getDownsampledPileup(int desiredCoverage) {
|
||||
if ( size() <= desiredCoverage )
|
||||
return this;
|
||||
|
||||
// randomly choose numbers corresponding to positions in the reads list
|
||||
Random generator = new Random();
|
||||
TreeSet<Integer> positions = new TreeSet<Integer>();
|
||||
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
|
||||
if ( positions.add(generator.nextInt(pileup.size())) )
|
||||
i++;
|
||||
}
|
||||
|
||||
Iterator positionIter = positions.iterator();
|
||||
ArrayList<ExtendedEventPileupElement> downsampledPileup = new ArrayList<ExtendedEventPileupElement>();
|
||||
|
||||
while ( positionIter.hasNext() ) {
|
||||
int nextReadToKeep = (Integer)positionIter.next();
|
||||
downsampledPileup.add(pileup.get(nextReadToKeep));
|
||||
}
|
||||
|
||||
return new ReadBackedExtendedEventPileup(getLocation(), downsampledPileup);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// iterators
|
||||
//
|
||||
// --------------------------------------------------------
|
||||
|
||||
/**
|
||||
* The best way to access PileupElements where you only care about the bases and quals in the pileup.
|
||||
*
|
||||
* for (PileupElement p : this) { doSomething(p); }
|
||||
*
|
||||
* Provides efficient iteration of the data.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Iterator<ExtendedEventPileupElement> iterator() {
|
||||
return pileup.iterator();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the number of deletion events in this pileup
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getNumberOfDeletions() {
|
||||
return nDeletions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of insertion events in this pileup
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getNumberOfInsertions() {
|
||||
return nInsertions;
|
||||
}
|
||||
|
||||
|
||||
/** Returns the length of the longest deletion observed at the site this
|
||||
* pileup is associated with (NOTE: by convention, both insertions and deletions
|
||||
* are associated with genomic location immediately before the actual event). If
|
||||
* there are no deletions at the site, returns 0.
|
||||
* @return
|
||||
*/
|
||||
public int getMaxDeletionLength() {
|
||||
return maxDeletionLength;
|
||||
}
|
||||
/**
|
||||
* Returns the number of mapping quality zero reads in this pileup.
|
||||
* @return
|
||||
*/
|
||||
public int getNumberOfMappingQualityZeroReads() {
|
||||
return nMQ0Reads;
|
||||
}
|
||||
|
||||
// public int getNumberOfDeletions() {
|
||||
// int n = 0;
|
||||
//
|
||||
// for ( int i = 0; i < size(); i++ ) {
|
||||
// if ( getOffsets().get(i) != -1 ) { n++; }
|
||||
// }
|
||||
//
|
||||
// return n;
|
||||
// }
|
||||
|
||||
/**
|
||||
* @return the number of elements in this pileup
|
||||
*/
|
||||
public int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the location of this pileup
|
||||
*/
|
||||
public GenomeLoc getLocation() {
|
||||
return loc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public String getShortPileupString() {
|
||||
// In the pileup format, each extended event line has genomic position (chromosome name and offset),
|
||||
// reference "base" (always set to "E" for E(xtended)), then 'I','D', or '.' symbol for each read representing
|
||||
// insertion, deletion or no-event, respectively.
|
||||
return String.format("%s %s E %s",
|
||||
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
|
||||
new String(getEvents()) );
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// Convenience functions that may be slow
|
||||
//
|
||||
// --------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
* @return
|
||||
*/
|
||||
public List<SAMRecord> getReads() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
|
||||
for ( ExtendedEventPileupElement pile : this ) { reads.add(pile.getRead()); }
|
||||
return reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
* @return
|
||||
*/
|
||||
public List<Integer> getOffsets() {
|
||||
List<Integer> offsets = new ArrayList<Integer>(size());
|
||||
for ( ExtendedEventPileupElement pile : this ) { offsets.add(pile.getOffset()); }
|
||||
return offsets;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of the events in this pileup ('I', 'D', or '.'). Note this call costs O(n) and allocates fresh array each time
|
||||
* @return
|
||||
*/
|
||||
public byte[] getEvents() {
|
||||
byte[] v = new byte[size()];
|
||||
int i = 0;
|
||||
for ( ExtendedEventPileupElement e : this ) {
|
||||
switch ( e.getType() ) {
|
||||
case INSERTION: v[i] = 'I'; break;
|
||||
case DELETION: v[i] = 'D'; break;
|
||||
case NOEVENT: v[i] = '.'; break;
|
||||
default: throw new StingException("Unknown event type encountered: "+e.getType());
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
/** A shortcut for getEventStringsWithCounts(null);
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public List<Pair<String,Integer>> getEventStringsWithCounts() {
|
||||
return getEventStringsWithCounts(null);
|
||||
}
|
||||
|
||||
/** Returns String representation of all distinct extended events (indels) at the site along with
|
||||
* observation counts (numbers of reads) for each distinct event. If refBases is null, a simple string representation for
|
||||
* deletions will be generated as "<length>D" (i.e. "5D"); if the reference bases are provided, the actual
|
||||
* deleted sequence will be used in the string representation (e.g. "-AAC").
|
||||
* @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and
|
||||
* extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+<length of longest deletion>)
|
||||
* @return list of distinct events; first element of a pair is a string representation of the event, second element
|
||||
* gives the number of reads, in which that event was observed
|
||||
*/
|
||||
public List<Pair<String,Integer>> getEventStringsWithCounts(byte[] refBases) {
|
||||
Map<String, Integer> events = new HashMap<String,Integer>();
|
||||
|
||||
for ( ExtendedEventPileupElement e : this ) {
|
||||
Integer cnt;
|
||||
String indel = null;
|
||||
switch ( e.getType() ) {
|
||||
case INSERTION:
|
||||
indel = "+"+e.getEventBases();
|
||||
break;
|
||||
case DELETION:
|
||||
indel = getDeletionString(e.getEventLength(),refBases);
|
||||
break;
|
||||
case NOEVENT: continue;
|
||||
default: throw new StingException("Unknown event type encountered: "+e.getType());
|
||||
}
|
||||
|
||||
cnt = events.get(indel);
|
||||
if ( cnt == null ) events.put(indel,1);
|
||||
else events.put(indel,cnt.intValue()+1);
|
||||
}
|
||||
|
||||
List<Pair<String,Integer>> eventList = new ArrayList<Pair<String,Integer>>(events.size());
|
||||
for ( Map.Entry<String,Integer> m : events.entrySet() ) {
|
||||
eventList.add( new Pair<String,Integer>(m.getKey(),m.getValue()));
|
||||
}
|
||||
return eventList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds string representation of the deletion event. If refBases is null, the representation will be
|
||||
* "<length>D" (e.g. "5D"); if the reference bases are provided, a verbose representation (e.g. "-AAC")
|
||||
* will be generated. NOTE: refBases must start with the base prior to the actual deletion (i.e. deleted
|
||||
* base(s) are refBase[1], refBase[2], ...), and the length of the passed array must be sufficient to accomodate the
|
||||
* deletion length (i.e. size of refBase must be at least length+1).
|
||||
* @param length
|
||||
* @param refBases
|
||||
* @return
|
||||
*/
|
||||
private String getDeletionString(int length, byte[] refBases) {
|
||||
if ( refBases == null ) {
|
||||
return Integer.toString(length)+"D"; // if we do not have reference bases, we can only report something like "5D"
|
||||
} else {
|
||||
return "-"+new String(refBases,1,length).toUpperCase();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an array of the mapping qualities
|
||||
* @return
|
||||
*/
|
||||
public byte[] getMappingQuals() {
|
||||
byte[] v = new byte[size()];
|
||||
int i = 0;
|
||||
for ( ExtendedEventPileupElement e : this ) { v[i++] = (byte)e.getRead().getMappingQuality(); }
|
||||
return v;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Private functions for printing pileups
|
||||
//
|
||||
private String getMappingQualsString() {
|
||||
return quals2String(getMappingQuals());
|
||||
}
|
||||
|
||||
private static String quals2String( byte[] quals ) {
|
||||
StringBuilder qualStr = new StringBuilder();
|
||||
for ( int qual : quals ) {
|
||||
qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea
|
||||
char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63
|
||||
qualStr.append(qualChar);
|
||||
}
|
||||
|
||||
return qualStr.toString();
|
||||
}
|
||||
|
||||
}
|
||||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* A clean interface for working with extended event pileups.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public interface ReadBackedExtendedEventPileup extends Iterable<ExtendedEventPileupElement> {
|
||||
/**
|
||||
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
|
||||
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
|
||||
* of the pileup (just returns this) if there are no MQ0 reads in the pileup.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public ReadBackedExtendedEventPileup getPileupWithoutMappingQualityZeroReads();
|
||||
|
||||
public ReadBackedExtendedEventPileup getMappingFilteredPileup( int minMapQ );
|
||||
|
||||
/**
|
||||
* Returns a pileup randomly downsampled to the desiredCoverage.
|
||||
*
|
||||
* @param desiredCoverage
|
||||
* @return
|
||||
*/
|
||||
public ReadBackedExtendedEventPileup getDownsampledPileup(int desiredCoverage);
|
||||
|
||||
/**
|
||||
* Returns the number of deletion events in this pileup
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getNumberOfDeletions();
|
||||
|
||||
/**
|
||||
* Returns the number of insertion events in this pileup
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getNumberOfInsertions();
|
||||
|
||||
/** Returns the length of the longest deletion observed at the site this
|
||||
* pileup is associated with (NOTE: by convention, both insertions and deletions
|
||||
* are associated with genomic location immediately before the actual event). If
|
||||
* there are no deletions at the site, returns 0.
|
||||
* @return
|
||||
*/
|
||||
public int getMaxDeletionLength();
|
||||
|
||||
/**
|
||||
* Returns the number of mapping quality zero reads in this pileup.
|
||||
* @return
|
||||
*/
|
||||
public int getNumberOfMappingQualityZeroReads();
|
||||
|
||||
/**
|
||||
* @return the number of elements in this pileup
|
||||
*/
|
||||
public int size();
|
||||
|
||||
/**
|
||||
* @return the location of this pileup
|
||||
*/
|
||||
public GenomeLoc getLocation();
|
||||
|
||||
public String getShortPileupString();
|
||||
|
||||
/**
|
||||
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
* @return
|
||||
*/
|
||||
public List<SAMRecord> getReads();
|
||||
|
||||
/**
|
||||
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
* @return
|
||||
*/
|
||||
public List<Integer> getOffsets();
|
||||
|
||||
/**
|
||||
* Returns an array of the events in this pileup ('I', 'D', or '.'). Note this call costs O(n) and allocates fresh array each time
|
||||
* @return
|
||||
*/
|
||||
public byte[] getEvents();
|
||||
|
||||
/** A shortcut for getEventStringsWithCounts(null);
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public List<Pair<String,Integer>> getEventStringsWithCounts();
|
||||
|
||||
/** Returns String representation of all distinct extended events (indels) at the site along with
|
||||
* observation counts (numbers of reads) for each distinct event. If refBases is null, a simple string representation for
|
||||
* deletions will be generated as "<length>D" (i.e. "5D"); if the reference bases are provided, the actual
|
||||
* deleted sequence will be used in the string representation (e.g. "-AAC").
|
||||
* @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and
|
||||
* extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+<length of longest deletion>)
|
||||
* @return list of distinct events; first element of a pair is a string representation of the event, second element
|
||||
* gives the number of reads, in which that event was observed
|
||||
*/
|
||||
public List<Pair<String,Integer>> getEventStringsWithCounts(byte[] refBases);
|
||||
|
||||
/**
|
||||
* Get an array of the mapping qualities
|
||||
* @return
|
||||
*/
|
||||
public byte[] getMappingQuals();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,157 +1,42 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.iterators.IterableIterator;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ExtendedPileupElement;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import org.broadinstitute.sting.gatk.iterators.IterableIterator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Version two file implementing pileups of bases in reads at a locus.
|
||||
* A data retrieval interface for accessing parts of the pileup.
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class ReadBackedPileup implements Iterable<PileupElement> {
|
||||
private GenomeLoc loc = null;
|
||||
private ArrayList<PileupElement> pileup = null;
|
||||
|
||||
private int size = 0; // cached value of the size of the pileup
|
||||
private int nDeletions = 0; // cached value of the number of deletions
|
||||
private int nMQ0Reads = 0; // cached value of the number of MQ0 reads
|
||||
|
||||
/**
|
||||
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
|
||||
* offsets. This pileup will contain a list, in order of the reads, of the piled bases at
|
||||
* reads[i] for all i in offsets. Does not make a copy of the data, so it's not safe to
|
||||
* go changing the reads.
|
||||
*
|
||||
* @param loc
|
||||
* @param reads
|
||||
* @param offsets
|
||||
*/
|
||||
public ReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
this(loc, readsOffsets2Pileup(reads, offsets));
|
||||
}
|
||||
|
||||
public ReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, int offset ) {
|
||||
this(loc, readsOffsets2Pileup(reads, offset));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new version of a read backed pileup at loc without any aligned reads
|
||||
*
|
||||
*/
|
||||
public ReadBackedPileup(GenomeLoc loc ) {
|
||||
this(loc, new ArrayList<PileupElement>(0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
|
||||
* offsets. This lower level constructure assumes pileup is well-formed and merely keeps a
|
||||
* pointer to pileup. Don't go changing the data in pileup.
|
||||
*
|
||||
*/
|
||||
public ReadBackedPileup(GenomeLoc loc, ArrayList<PileupElement> pileup ) {
|
||||
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2");
|
||||
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2");
|
||||
|
||||
this.loc = loc;
|
||||
this.pileup = pileup;
|
||||
|
||||
calculatedCachedData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimization of above constructor where all of the cached data is provided
|
||||
* @param loc
|
||||
* @param pileup
|
||||
*/
|
||||
public ReadBackedPileup(GenomeLoc loc, ArrayList<PileupElement> pileup, int size, int nDeletions, int nMQ0Reads ) {
|
||||
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2");
|
||||
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2");
|
||||
|
||||
this.loc = loc;
|
||||
this.pileup = pileup;
|
||||
this.size = size;
|
||||
this.nDeletions = nDeletions;
|
||||
this.nMQ0Reads = nMQ0Reads;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
|
||||
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
|
||||
* sizes, nDeletion, etc. over and over potentially.
|
||||
*/
|
||||
private void calculatedCachedData() {
|
||||
size = 0;
|
||||
nDeletions = 0;
|
||||
nMQ0Reads = 0;
|
||||
|
||||
for ( PileupElement p : this ) {
|
||||
size++;
|
||||
if ( p.isDeletion() ) {
|
||||
nDeletions++;
|
||||
}
|
||||
if ( p.getRead().getMappingQuality() == 0 ) {
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Helper routine for converting reads and offset lists to a PileupElement list.
|
||||
*
|
||||
* @param reads
|
||||
* @param offsets
|
||||
* @return
|
||||
*/
|
||||
private static ArrayList<PileupElement> readsOffsets2Pileup(List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
if ( reads == null ) throw new StingException("Illegal null read list in ReadBackedPileup2");
|
||||
if ( offsets == null ) throw new StingException("Illegal null offsets list in ReadBackedPileup2");
|
||||
if ( reads.size() != offsets.size() ) throw new StingException("Reads and offset lists have different sizes!");
|
||||
|
||||
ArrayList<PileupElement> pileup = new ArrayList<PileupElement>(reads.size());
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
pileup.add( new PileupElement( reads.get(i), offsets.get(i) ) );
|
||||
}
|
||||
|
||||
return pileup;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper routine for converting reads and a single offset to a PileupElement list.
|
||||
*
|
||||
* @param reads
|
||||
* @param offset
|
||||
* @return
|
||||
*/
|
||||
private static ArrayList<PileupElement> readsOffsets2Pileup(List<SAMRecord> reads, int offset ) {
|
||||
if ( reads == null ) throw new StingException("Illegal null read list in ReadBackedPileup2");
|
||||
if ( offset < 0 ) throw new StingException("Illegal offset < 0 ReadBackedPileup2");
|
||||
|
||||
ArrayList<PileupElement> pileup = new ArrayList<PileupElement>(reads.size());
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
pileup.add( new PileupElement( reads.get(i), offset ) );
|
||||
}
|
||||
|
||||
return pileup;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// Special 'constructors'
|
||||
//
|
||||
// --------------------------------------------------------
|
||||
|
||||
public interface ReadBackedPileup extends Iterable<PileupElement> {
|
||||
/**
|
||||
* Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this
|
||||
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
|
||||
|
|
@ -159,20 +44,7 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
public ReadBackedPileup getPileupWithoutDeletions() {
|
||||
if ( getNumberOfDeletions() > 0 ) {
|
||||
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
|
||||
|
||||
for ( PileupElement p : pileup ) {
|
||||
if ( !p.isDeletion() ) {
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
}
|
||||
return new ReadBackedPileup(loc, filteredPileup);
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
public ReadBackedPileup getPileupWithoutDeletions();
|
||||
|
||||
/**
|
||||
* Returns a new ReadBackedPileup where only one read from an overlapping read
|
||||
|
|
@ -182,32 +54,7 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
*
|
||||
* @return the newly filtered pileup
|
||||
*/
|
||||
public ReadBackedPileup getOverlappingFragementFilteredPileup() {
|
||||
Map<String, PileupElement> filteredPileup = new HashMap<String, PileupElement>();
|
||||
|
||||
for ( PileupElement p : pileup ) {
|
||||
String readName = p.getRead().getReadName();
|
||||
|
||||
// if we've never seen this read before, life is good
|
||||
if (!filteredPileup.containsKey(readName)) {
|
||||
filteredPileup.put(readName, p);
|
||||
} else {
|
||||
PileupElement existing = filteredPileup.get(readName);
|
||||
|
||||
// if the reads disagree at this position, throw them both out. Otherwise
|
||||
// keep the element with the higher quality score
|
||||
if (existing.getBase() != p.getBase()) {
|
||||
filteredPileup.remove(readName);
|
||||
} else {
|
||||
if (existing.getQual() < p.getQual()) {
|
||||
filteredPileup.put(readName, p);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new ReadBackedPileup(loc, new ArrayList<PileupElement>(filteredPileup.values()));
|
||||
}
|
||||
public ReadBackedPileup getOverlappingFragmentFilteredPileup();
|
||||
|
||||
/**
|
||||
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
|
||||
|
|
@ -216,20 +63,7 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
public ReadBackedPileup getPileupWithoutMappingQualityZeroReads() {
|
||||
|
||||
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
|
||||
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
|
||||
for ( PileupElement p : pileup ) {
|
||||
if ( p.getRead().getMappingQuality() > 0 ) {
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
}
|
||||
return new ReadBackedPileup(loc, filteredPileup);
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
public ReadBackedPileup getPileupWithoutMappingQualityZeroReads();
|
||||
|
||||
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
|
||||
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
|
||||
|
|
@ -237,35 +71,21 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
* @param minMapQ
|
||||
* @return
|
||||
*/
|
||||
public ReadBackedPileup getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ ) {
|
||||
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
|
||||
|
||||
for ( PileupElement p : pileup ) {
|
||||
if ( p.getRead().getMappingQuality() >= minMapQ && (p.isDeletion() || p.getQual() >= minBaseQ) ) {
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
}
|
||||
|
||||
return new ReadBackedPileup(loc, filteredPileup);
|
||||
}
|
||||
public ReadBackedPileup getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ );
|
||||
|
||||
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ.
|
||||
* This method allocates and returns a new instance of ReadBackedPileup.
|
||||
* @param minBaseQ
|
||||
* @return
|
||||
*/
|
||||
public ReadBackedPileup getBaseFilteredPileup( int minBaseQ ) {
|
||||
return getBaseAndMappingFilteredPileup(minBaseQ, -1);
|
||||
}
|
||||
|
||||
public ReadBackedPileup getBaseFilteredPileup( int minBaseQ );
|
||||
|
||||
/** Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ.
|
||||
* This method allocates and returns a new instance of ReadBackedPileup.
|
||||
* @param minMapQ
|
||||
* @return
|
||||
*/
|
||||
public ReadBackedPileup getMappingFilteredPileup( int minMapQ ) {
|
||||
return getBaseAndMappingFilteredPileup(-1, minMapQ);
|
||||
}
|
||||
public ReadBackedPileup getMappingFilteredPileup( int minMapQ );
|
||||
|
||||
/**
|
||||
* Returns a pileup randomly downsampled to the desiredCoverage.
|
||||
|
|
@ -273,97 +93,36 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
* @param desiredCoverage
|
||||
* @return
|
||||
*/
|
||||
public ReadBackedPileup getDownsampledPileup(int desiredCoverage) {
|
||||
if ( size() <= desiredCoverage )
|
||||
return this;
|
||||
|
||||
// randomly choose numbers corresponding to positions in the reads list
|
||||
Random generator = new Random();
|
||||
TreeSet<Integer> positions = new TreeSet<Integer>();
|
||||
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
|
||||
if ( positions.add(generator.nextInt(pileup.size())) )
|
||||
i++;
|
||||
}
|
||||
|
||||
Iterator positionIter = positions.iterator();
|
||||
ArrayList<PileupElement> downsampledPileup = new ArrayList<PileupElement>();
|
||||
|
||||
while ( positionIter.hasNext() ) {
|
||||
int nextReadToKeep = (Integer)positionIter.next();
|
||||
downsampledPileup.add(pileup.get(nextReadToKeep));
|
||||
}
|
||||
|
||||
return new ReadBackedPileup(getLocation(), downsampledPileup);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// iterators
|
||||
//
|
||||
// --------------------------------------------------------
|
||||
public ReadBackedPileup getDownsampledPileup(int desiredCoverage);
|
||||
|
||||
/**
|
||||
* The best way to access PileupElements where you only care about the bases and quals in the pileup.
|
||||
*
|
||||
* for (PileupElement p : this) { doSomething(p); }
|
||||
*
|
||||
* Provides efficient iteration of the data.
|
||||
*
|
||||
* @return
|
||||
* Gets the particular subset of this pileup with the given sample name.
|
||||
* @param sampleName Name of the sample to use.
|
||||
* @return A subset of this pileup containing only reads with the given sample.
|
||||
*/
|
||||
public Iterator<PileupElement> iterator() {
|
||||
return pileup.iterator();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The best way to access PileupElements where you only care not only about bases and quals in the pileup
|
||||
* but also need access to the index of the pileup element in the pile.
|
||||
*
|
||||
* for (ExtendedPileupElement p : this) { doSomething(p); }
|
||||
*
|
||||
* Provides efficient iteration of the data.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
|
||||
// todo -- reimplement efficiently
|
||||
public IterableIterator<ExtendedPileupElement> extendedForeachIterator() {
|
||||
ArrayList<ExtendedPileupElement> x = new ArrayList<ExtendedPileupElement>(size());
|
||||
int i = 0;
|
||||
for ( PileupElement pile : this ) {
|
||||
x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this));
|
||||
}
|
||||
|
||||
return new IterableIterator<ExtendedPileupElement>(x.iterator());
|
||||
}
|
||||
public ReadBackedPileup getPileupForSample(String sampleName);
|
||||
|
||||
// todo -- delete or make private
|
||||
public IterableIterator<ExtendedPileupElement> extendedForeachIterator();
|
||||
|
||||
/**
|
||||
* Simple useful routine to count the number of deletion bases in this pileup
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getNumberOfDeletions() {
|
||||
return nDeletions;
|
||||
}
|
||||
public int getNumberOfDeletions();
|
||||
|
||||
public int getNumberOfMappingQualityZeroReads() {
|
||||
return nMQ0Reads;
|
||||
}
|
||||
public int getNumberOfMappingQualityZeroReads();
|
||||
|
||||
/**
|
||||
* @return the number of elements in this pileup
|
||||
*/
|
||||
public int size() {
|
||||
return size;
|
||||
}
|
||||
public int size();
|
||||
|
||||
/**
|
||||
* @return the location of this pileup
|
||||
*/
|
||||
public GenomeLoc getLocation() {
|
||||
return loc;
|
||||
}
|
||||
public GenomeLoc getLocation();
|
||||
|
||||
/**
|
||||
* Get counts of A, C, G, T in order, which returns a int[4] vector with counts according
|
||||
|
|
@ -371,143 +130,49 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
public int[] getBaseCounts() {
|
||||
int[] counts = new int[4];
|
||||
for ( PileupElement pile : this ) {
|
||||
// skip deletion sites
|
||||
if ( ! pile.isDeletion() ) {
|
||||
int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase());
|
||||
if (index != -1)
|
||||
counts[index]++;
|
||||
}
|
||||
}
|
||||
|
||||
return counts;
|
||||
}
|
||||
public int[] getBaseCounts();
|
||||
|
||||
/**
|
||||
* Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated
|
||||
* @return
|
||||
*/
|
||||
public boolean hasSecondaryBases() {
|
||||
for ( PileupElement pile : this ) {
|
||||
// skip deletion sites
|
||||
if ( ! pile.isDeletion() && BaseUtils.isRegularBase((char)pile.getSecondBase()) )
|
||||
return true;
|
||||
}
|
||||
public boolean hasSecondaryBases();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public String getPileupString(char ref) {
|
||||
// In the pileup format, each line represents a genomic position, consisting of chromosome name,
|
||||
// coordinate, reference base, read bases, read qualities and alignment mapping qualities.
|
||||
return String.format("%s %s %c %s %s",
|
||||
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
|
||||
ref, // reference base
|
||||
new String(getBases()),
|
||||
getQualsString());
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// Convenience functions that may be slow
|
||||
//
|
||||
// --------------------------------------------------------
|
||||
public String getPileupString(char ref);
|
||||
|
||||
/**
|
||||
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
* @return
|
||||
*/
|
||||
public List<SAMRecord> getReads() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
|
||||
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
|
||||
return reads;
|
||||
}
|
||||
public List<SAMRecord> getReads();
|
||||
|
||||
/**
|
||||
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
* @return
|
||||
*/
|
||||
public List<Integer> getOffsets() {
|
||||
List<Integer> offsets = new ArrayList<Integer>(size());
|
||||
for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); }
|
||||
return offsets;
|
||||
}
|
||||
public List<Integer> getOffsets();
|
||||
|
||||
/**
|
||||
* Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time
|
||||
* @return
|
||||
*/
|
||||
public byte[] getBases() {
|
||||
byte[] v = new byte[size()];
|
||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); }
|
||||
return v;
|
||||
}
|
||||
public byte[] getBases();
|
||||
|
||||
/**
|
||||
* Returns an array of the secondary bases in this pileup. Note this call costs O(n) and allocates fresh array each time
|
||||
* @return
|
||||
*/
|
||||
public byte[] getSecondaryBases() {
|
||||
byte[] v = new byte[size()];
|
||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); }
|
||||
return v;
|
||||
}
|
||||
public byte[] getSecondaryBases();
|
||||
|
||||
/**
|
||||
* Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time
|
||||
* @return
|
||||
*/
|
||||
public byte[] getQuals() {
|
||||
byte[] v = new byte[size()];
|
||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); }
|
||||
return v;
|
||||
}
|
||||
/**
|
||||
* Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time
|
||||
* @return
|
||||
*/
|
||||
public byte[] getQuals();
|
||||
|
||||
/**
|
||||
* Get an array of the mapping qualities
|
||||
* @return
|
||||
*/
|
||||
public byte[] getMappingQuals() {
|
||||
byte[] v = new byte[size()];
|
||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = (byte)pile.getRead().getMappingQuality(); }
|
||||
return v;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Private functions for printing pileups
|
||||
//
|
||||
private String getMappingQualsString() {
|
||||
return quals2String(getMappingQuals());
|
||||
}
|
||||
|
||||
private static String quals2String( byte[] quals ) {
|
||||
StringBuilder qualStr = new StringBuilder();
|
||||
for ( int qual : quals ) {
|
||||
qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea
|
||||
char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63
|
||||
qualStr.append(qualChar);
|
||||
}
|
||||
|
||||
return qualStr.toString();
|
||||
}
|
||||
|
||||
private String getQualsString() {
|
||||
return quals2String(getQuals());
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuilder s = new StringBuilder();
|
||||
s.append(getLocation());
|
||||
s.append(": ");
|
||||
|
||||
for ( PileupElement p : this ) {
|
||||
s.append((char)p.getBase());
|
||||
}
|
||||
|
||||
return s.toString();
|
||||
}
|
||||
public byte[] getMappingQuals();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,348 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Extended event pileups, split out by sample.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class SampleSplitReadBackedExtendedEventPileup implements ReadBackedExtendedEventPileup {
|
||||
private GenomeLoc loc = null;
|
||||
private Map<String,ReadBackedExtendedEventPileup> pileupBySample = null;
|
||||
|
||||
private int size = 0; // cached value of the size of the pileup
|
||||
private int maxDeletionLength = 0; // cached value of the length of the longest deletion observed at the site
|
||||
private int nDeletions = 0; // cached value of the number of deletions
|
||||
private int nInsertions = 0;
|
||||
private int nMQ0Reads = 0; // cached value of the number of MQ0 reads
|
||||
|
||||
public SampleSplitReadBackedExtendedEventPileup(GenomeLoc loc) {
|
||||
this(loc,new HashMap<String,ReadBackedExtendedEventPileup>());
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimization of above constructor where all of the cached data is provided
|
||||
* @param loc Location of this pileup.
|
||||
* @param pileup Pileup data, split by sample name.
|
||||
*/
|
||||
public SampleSplitReadBackedExtendedEventPileup(GenomeLoc loc, Map<String,ReadBackedExtendedEventPileup> pileup) {
|
||||
if ( loc == null ) throw new StingException("Illegal null genomeloc in UnifiedReadBackedPileup");
|
||||
if ( pileup == null ) throw new StingException("Illegal null pileup in UnifiedReadBackedPileup");
|
||||
|
||||
this.loc = loc;
|
||||
this.pileupBySample = pileup;
|
||||
for(ReadBackedExtendedEventPileup pileupBySample: pileup.values()) {
|
||||
this.size += pileupBySample.size();
|
||||
this.nDeletions += pileupBySample.getNumberOfDeletions();
|
||||
this.nMQ0Reads += pileupBySample.getNumberOfMappingQualityZeroReads();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void addPileup(final String sampleName, ReadBackedExtendedEventPileup pileup) {
|
||||
pileupBySample.put(sampleName,pileup);
|
||||
|
||||
size += pileup.size();
|
||||
nDeletions += pileup.getNumberOfDeletions();
|
||||
nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReadBackedExtendedEventPileup getPileupWithoutMappingQualityZeroReads() {
|
||||
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
|
||||
SampleSplitReadBackedExtendedEventPileup filteredPileup = new SampleSplitReadBackedExtendedEventPileup(loc);
|
||||
|
||||
for (Map.Entry<String,ReadBackedExtendedEventPileup> entry: pileupBySample.entrySet()) {
|
||||
String sampleName = entry.getKey();
|
||||
ReadBackedExtendedEventPileup pileup = entry.getValue();
|
||||
filteredPileup.addPileup(sampleName,pileup.getPileupWithoutMappingQualityZeroReads());
|
||||
}
|
||||
|
||||
return filteredPileup;
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReadBackedExtendedEventPileup getMappingFilteredPileup( int minMapQ ) {
|
||||
SampleSplitReadBackedExtendedEventPileup filteredPileup = new SampleSplitReadBackedExtendedEventPileup(loc);
|
||||
|
||||
for (Map.Entry<String,ReadBackedExtendedEventPileup> entry: pileupBySample.entrySet()) {
|
||||
String sampleName = entry.getKey();
|
||||
ReadBackedExtendedEventPileup pileup = entry.getValue();
|
||||
filteredPileup.addPileup(sampleName,pileup.getMappingFilteredPileup(minMapQ));
|
||||
}
|
||||
|
||||
return filteredPileup;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a pileup randomly downsampled to the desiredCoverage.
|
||||
*
|
||||
* @param desiredCoverage
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public ReadBackedExtendedEventPileup getDownsampledPileup(int desiredCoverage) {
|
||||
if ( size() <= desiredCoverage )
|
||||
return this;
|
||||
|
||||
// randomly choose numbers corresponding to positions in the reads list
|
||||
Random generator = new Random();
|
||||
TreeSet<Integer> positions = new TreeSet<Integer>();
|
||||
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
|
||||
if ( positions.add(generator.nextInt(size)) )
|
||||
i++;
|
||||
}
|
||||
|
||||
SampleSplitReadBackedExtendedEventPileup downsampledPileup = new SampleSplitReadBackedExtendedEventPileup(getLocation());
|
||||
int current = 0;
|
||||
|
||||
for(Map.Entry<String,ReadBackedExtendedEventPileup> entry: this.pileupBySample.entrySet()) {
|
||||
String sampleName = entry.getKey();
|
||||
ReadBackedExtendedEventPileup pileup = entry.getValue();
|
||||
|
||||
List<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
|
||||
for(ExtendedEventPileupElement p: pileup) {
|
||||
if(positions.contains(current))
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
|
||||
if(!filteredPileup.isEmpty())
|
||||
downsampledPileup.addPileup(sampleName,new UnifiedReadBackedExtendedEventPileup(loc,filteredPileup));
|
||||
|
||||
current++;
|
||||
}
|
||||
|
||||
return downsampledPileup;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<ExtendedEventPileupElement> iterator() {
|
||||
return new ExtendedEventCastingIterator(new MergingPileupElementIterator(pileupBySample));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of deletion events in this pileup
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int getNumberOfDeletions() {
|
||||
return nDeletions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of insertion events in this pileup
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int getNumberOfInsertions() {
|
||||
return nInsertions;
|
||||
}
|
||||
|
||||
/** Returns the length of the longest deletion observed at the site this
|
||||
* pileup is associated with (NOTE: by convention, both insertions and deletions
|
||||
* are associated with genomic location immediately before the actual event). If
|
||||
* there are no deletions at the site, returns 0.
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int getMaxDeletionLength() {
|
||||
int maxDeletionLength = 0;
|
||||
for(ReadBackedExtendedEventPileup pileup: pileupBySample.values())
|
||||
maxDeletionLength = Math.max(maxDeletionLength,pileup.getMaxDeletionLength());
|
||||
return maxDeletionLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of mapping quality zero reads in this pileup.
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int getNumberOfMappingQualityZeroReads() {
|
||||
return nMQ0Reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the number of elements in this pileup
|
||||
*/
|
||||
@Override
|
||||
public int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the location of this pileup
|
||||
*/
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return loc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getShortPileupString() {
|
||||
// In the pileup format, each extended event line has genomic position (chromosome name and offset),
|
||||
// reference "base" (always set to "E" for E(xtended)), then 'I','D', or '.' symbol for each read representing
|
||||
// insertion, deletion or no-event, respectively.
|
||||
return String.format("%s %s E %s",
|
||||
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
|
||||
new String(getEvents()) );
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// Convenience functions that may be slow
|
||||
//
|
||||
// --------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<SAMRecord> getReads() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
|
||||
for ( ExtendedEventPileupElement pile : this ) { reads.add(pile.getRead()); }
|
||||
return reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<Integer> getOffsets() {
|
||||
List<Integer> offsets = new ArrayList<Integer>(size());
|
||||
for ( ExtendedEventPileupElement pile : this ) { offsets.add(pile.getOffset()); }
|
||||
return offsets;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of the events in this pileup ('I', 'D', or '.'). Note this call costs O(n) and allocates fresh array each time
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public byte[] getEvents() {
|
||||
byte[] v = new byte[size()];
|
||||
int i = 0;
|
||||
for ( ExtendedEventPileupElement e : this ) {
|
||||
switch ( e.getType() ) {
|
||||
case INSERTION: v[i] = 'I'; break;
|
||||
case DELETION: v[i] = 'D'; break;
|
||||
case NOEVENT: v[i] = '.'; break;
|
||||
default: throw new StingException("Unknown event type encountered: "+e.getType());
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
/** A shortcut for getEventStringsWithCounts(null);
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<Pair<String,Integer>> getEventStringsWithCounts() {
|
||||
return getEventStringsWithCounts(null);
|
||||
}
|
||||
|
||||
/** Returns String representation of all distinct extended events (indels) at the site along with
|
||||
* observation counts (numbers of reads) for each distinct event. If refBases is null, a simple string representation for
|
||||
* deletions will be generated as "<length>D" (i.e. "5D"); if the reference bases are provided, the actual
|
||||
* deleted sequence will be used in the string representation (e.g. "-AAC").
|
||||
* @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and
|
||||
* extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+<length of longest deletion>)
|
||||
* @return list of distinct events; first element of a pair is a string representation of the event, second element
|
||||
* gives the number of reads, in which that event was observed
|
||||
*/
|
||||
@Override
|
||||
public List<Pair<String,Integer>> getEventStringsWithCounts(byte[] refBases) {
|
||||
Map<String, Integer> events = new HashMap<String,Integer>();
|
||||
|
||||
for ( ExtendedEventPileupElement e : this ) {
|
||||
Integer cnt;
|
||||
String indel = null;
|
||||
switch ( e.getType() ) {
|
||||
case INSERTION:
|
||||
indel = "+"+e.getEventBases();
|
||||
break;
|
||||
case DELETION:
|
||||
indel = UnifiedReadBackedExtendedEventPileup.getDeletionString(e.getEventLength(),refBases);
|
||||
break;
|
||||
case NOEVENT: continue;
|
||||
default: throw new StingException("Unknown event type encountered: "+e.getType());
|
||||
}
|
||||
|
||||
cnt = events.get(indel);
|
||||
if ( cnt == null ) events.put(indel,1);
|
||||
else events.put(indel,cnt.intValue()+1);
|
||||
}
|
||||
|
||||
List<Pair<String,Integer>> eventList = new ArrayList<Pair<String,Integer>>(events.size());
|
||||
for ( Map.Entry<String,Integer> m : events.entrySet() ) {
|
||||
eventList.add( new Pair<String,Integer>(m.getKey(),m.getValue()));
|
||||
}
|
||||
return eventList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an array of the mapping qualities
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public byte[] getMappingQuals() {
|
||||
byte[] v = new byte[size()];
|
||||
int i = 0;
|
||||
for ( ExtendedEventPileupElement e : this ) { v[i++] = (byte)e.getRead().getMappingQuality(); }
|
||||
return v;
|
||||
}
|
||||
|
||||
private class ExtendedEventCastingIterator implements Iterator<ExtendedEventPileupElement> {
|
||||
private final Iterator<PileupElement> wrappedIterator;
|
||||
|
||||
public ExtendedEventCastingIterator(Iterator<PileupElement> iterator) {
|
||||
this.wrappedIterator = iterator;
|
||||
}
|
||||
|
||||
public boolean hasNext() { return wrappedIterator.hasNext(); }
|
||||
public ExtendedEventPileupElement next() { return (ExtendedEventPileupElement)wrappedIterator.next(); }
|
||||
public void remove() { wrappedIterator.remove(); }
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,414 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.gatk.iterators.IterableIterator;
|
||||
|
||||
/**
|
||||
* A read-backed pileup that internally divides the dataset based
|
||||
* on sample, for super efficient per-sample access.
|
||||
*
|
||||
* TODO: there are a few functions that are shared between UnifiedReadBackedPileup and SampleSplitReadBackedPileup.
|
||||
* TODO: refactor these into a common class.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class SampleSplitReadBackedPileup implements ReadBackedPileup {
|
||||
private GenomeLoc loc = null;
|
||||
private Map<String,ReadBackedPileup> pileupBySample = null;
|
||||
|
||||
private int size = 0; // cached value of the size of the pileup
|
||||
private int nDeletions = 0; // cached value of the number of deletions
|
||||
private int nMQ0Reads = 0; // cached value of the number of MQ0 reads
|
||||
|
||||
public SampleSplitReadBackedPileup(GenomeLoc loc) {
|
||||
this(loc,new HashMap<String,ReadBackedPileup>());
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimization of above constructor where all of the cached data is provided
|
||||
* @param loc Location of this pileup.
|
||||
* @param pileup Pileup data, split by sample name.
|
||||
*/
|
||||
public SampleSplitReadBackedPileup(GenomeLoc loc, Map<String,ReadBackedPileup> pileup) {
|
||||
if ( loc == null ) throw new StingException("Illegal null genomeloc in UnifiedReadBackedPileup");
|
||||
if ( pileup == null ) throw new StingException("Illegal null pileup in UnifiedReadBackedPileup");
|
||||
|
||||
this.loc = loc;
|
||||
this.pileupBySample = pileup;
|
||||
for(ReadBackedPileup pileupBySample: pileup.values()) {
|
||||
this.size += pileupBySample.size();
|
||||
this.nDeletions += pileupBySample.getNumberOfDeletions();
|
||||
this.nMQ0Reads += pileupBySample.getNumberOfMappingQualityZeroReads();
|
||||
}
|
||||
}
|
||||
|
||||
private void addPileup(final String sampleName, ReadBackedPileup pileup) {
|
||||
pileupBySample.put(sampleName,pileup);
|
||||
|
||||
size += pileup.size();
|
||||
nDeletions += pileup.getNumberOfDeletions();
|
||||
nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this
|
||||
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
|
||||
* of the pileup (just returns this) if there are no deletions in the pileup.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public ReadBackedPileup getPileupWithoutDeletions() {
|
||||
if ( getNumberOfDeletions() > 0 ) {
|
||||
SampleSplitReadBackedPileup filteredPileup = new SampleSplitReadBackedPileup(loc);
|
||||
|
||||
for (Map.Entry<String,ReadBackedPileup> entry: pileupBySample.entrySet()) {
|
||||
String sampleName = entry.getKey();
|
||||
ReadBackedPileup pileup = entry.getValue();
|
||||
filteredPileup.addPileup(sampleName,pileup.getPileupWithoutDeletions());
|
||||
}
|
||||
|
||||
return filteredPileup;
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new ReadBackedPileup where only one read from an overlapping read
|
||||
* pair is retained. If the two reads in question disagree to their basecall,
|
||||
* neither read is retained. If they agree on the base, the read with the higher
|
||||
* quality observation is retained
|
||||
*
|
||||
* @return the newly filtered pileup
|
||||
*/
|
||||
@Override
|
||||
public ReadBackedPileup getOverlappingFragmentFilteredPileup() {
|
||||
SampleSplitReadBackedPileup filteredPileup = new SampleSplitReadBackedPileup(loc);
|
||||
|
||||
for (Map.Entry<String,ReadBackedPileup> entry: pileupBySample.entrySet()) {
|
||||
String sampleName = entry.getKey();
|
||||
ReadBackedPileup pileup = entry.getValue();
|
||||
filteredPileup.addPileup(sampleName,pileup.getOverlappingFragmentFilteredPileup());
|
||||
}
|
||||
|
||||
return filteredPileup;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
|
||||
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
|
||||
* of the pileup (just returns this) if there are no MQ0 reads in the pileup.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public ReadBackedPileup getPileupWithoutMappingQualityZeroReads() {
|
||||
SampleSplitReadBackedPileup filteredPileup = new SampleSplitReadBackedPileup(loc);
|
||||
|
||||
for (Map.Entry<String,ReadBackedPileup> entry: pileupBySample.entrySet()) {
|
||||
String sampleName = entry.getKey();
|
||||
ReadBackedPileup pileup = entry.getValue();
|
||||
filteredPileup.addPileup(sampleName,pileup.getPileupWithoutMappingQualityZeroReads());
|
||||
}
|
||||
|
||||
return filteredPileup;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
|
||||
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
|
||||
* @param minBaseQ
|
||||
* @param minMapQ
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public ReadBackedPileup getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ ) {
|
||||
SampleSplitReadBackedPileup filteredPileup = new SampleSplitReadBackedPileup(loc);
|
||||
|
||||
for (Map.Entry<String,ReadBackedPileup> entry: pileupBySample.entrySet()) {
|
||||
String sampleName = entry.getKey();
|
||||
ReadBackedPileup pileup = entry.getValue();
|
||||
filteredPileup.addPileup(sampleName,pileup.getBaseAndMappingFilteredPileup(minBaseQ,minMapQ));
|
||||
}
|
||||
|
||||
return filteredPileup;
|
||||
}
|
||||
|
||||
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ.
|
||||
* This method allocates and returns a new instance of ReadBackedPileup.
|
||||
* @param minBaseQ
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public ReadBackedPileup getBaseFilteredPileup( int minBaseQ ) {
|
||||
return getBaseAndMappingFilteredPileup(minBaseQ, -1);
|
||||
}
|
||||
|
||||
/** Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ.
|
||||
* This method allocates and returns a new instance of ReadBackedPileup.
|
||||
* @param minMapQ
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public ReadBackedPileup getMappingFilteredPileup( int minMapQ ) {
|
||||
return getBaseAndMappingFilteredPileup(-1, minMapQ);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a pileup randomly downsampled to the desiredCoverage.
|
||||
*
|
||||
* @param desiredCoverage
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public ReadBackedPileup getDownsampledPileup(int desiredCoverage) {
|
||||
if ( size() <= desiredCoverage )
|
||||
return this;
|
||||
|
||||
// randomly choose numbers corresponding to positions in the reads list
|
||||
Random generator = new Random();
|
||||
TreeSet<Integer> positions = new TreeSet<Integer>();
|
||||
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
|
||||
if ( positions.add(generator.nextInt(size)) )
|
||||
i++;
|
||||
}
|
||||
|
||||
SampleSplitReadBackedPileup downsampledPileup = new SampleSplitReadBackedPileup(getLocation());
|
||||
int current = 0;
|
||||
|
||||
for(Map.Entry<String,ReadBackedPileup> entry: this.pileupBySample.entrySet()) {
|
||||
String sampleName = entry.getKey();
|
||||
ReadBackedPileup pileup = entry.getValue();
|
||||
|
||||
List<PileupElement> filteredPileup = new ArrayList<PileupElement>();
|
||||
for(PileupElement p: pileup) {
|
||||
if(positions.contains(current))
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
|
||||
if(!filteredPileup.isEmpty())
|
||||
downsampledPileup.addPileup(sampleName,new UnifiedReadBackedPileup(loc,filteredPileup));
|
||||
|
||||
current++;
|
||||
}
|
||||
|
||||
return downsampledPileup;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReadBackedPileup getPileupForSample(String sampleName) {
|
||||
return pileupBySample.containsKey(sampleName) ? pileupBySample.get(sampleName) : null;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// iterators
|
||||
//
|
||||
// --------------------------------------------------------
|
||||
|
||||
/**
|
||||
* The best way to access PileupElements where you only care about the bases and quals in the pileup.
|
||||
*
|
||||
* for (PileupElement p : this) { doSomething(p); }
|
||||
*
|
||||
* Provides efficient iteration of the data.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public Iterator<PileupElement> iterator() {
|
||||
return new MergingPileupElementIterator(pileupBySample);
|
||||
}
|
||||
|
||||
// todo -- why is this public?
|
||||
@Override
|
||||
public IterableIterator<ExtendedPileupElement> extendedForeachIterator() {
|
||||
ArrayList<ExtendedPileupElement> x = new ArrayList<ExtendedPileupElement>(size());
|
||||
int i = 0;
|
||||
Iterator<PileupElement> iterator = new MergingPileupElementIterator(pileupBySample);
|
||||
while(iterator.hasNext()) {
|
||||
PileupElement pile = iterator.next();
|
||||
x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this));
|
||||
}
|
||||
|
||||
return new IterableIterator<ExtendedPileupElement>(x.iterator());
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple useful routine to count the number of deletion bases in this pileup
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int getNumberOfDeletions() {
|
||||
return nDeletions;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumberOfMappingQualityZeroReads() {
|
||||
return nMQ0Reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the number of elements in this pileup
|
||||
*/
|
||||
@Override
|
||||
public int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the location of this pileup
|
||||
*/
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return loc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get counts of A, C, G, T in order, which returns a int[4] vector with counts according
|
||||
* to BaseUtils.simpleBaseToBaseIndex for each base.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int[] getBaseCounts() {
|
||||
int[] counts = new int[4];
|
||||
for(ReadBackedPileup pileup: pileupBySample.values()) {
|
||||
int[] countsBySample = pileup.getBaseCounts();
|
||||
for(int i = 0; i < counts.length; i++)
|
||||
counts[i] += countsBySample[i];
|
||||
}
|
||||
return counts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean hasSecondaryBases() {
|
||||
boolean hasSecondaryBases = false;
|
||||
for(ReadBackedPileup pileup: pileupBySample.values())
|
||||
hasSecondaryBases |= pileup.hasSecondaryBases();
|
||||
return hasSecondaryBases;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPileupString(char ref) {
|
||||
// In the pileup format, each line represents a genomic position, consisting of chromosome name,
|
||||
// coordinate, reference base, read bases, read qualities and alignment mapping qualities.
|
||||
return String.format("%s %s %c %s %s",
|
||||
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
|
||||
ref, // reference base
|
||||
new String(getBases()),
|
||||
getQualsString());
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// Convenience functions that may be slow
|
||||
//
|
||||
// --------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<SAMRecord> getReads() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
|
||||
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
|
||||
return reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<Integer> getOffsets() {
|
||||
List<Integer> offsets = new ArrayList<Integer>(size());
|
||||
for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); }
|
||||
return offsets;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public byte[] getBases() {
|
||||
byte[] v = new byte[size()];
|
||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); }
|
||||
return v;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of the secondary bases in this pileup. Note this call costs O(n) and allocates fresh array each time
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public byte[] getSecondaryBases() {
|
||||
byte[] v = new byte[size()];
|
||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); }
|
||||
return v;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public byte[] getQuals() {
|
||||
byte[] v = new byte[size()];
|
||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); }
|
||||
return v;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an array of the mapping qualities
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public byte[] getMappingQuals() {
|
||||
byte[] v = new byte[size()];
|
||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = (byte)pile.getRead().getMappingQuality(); }
|
||||
return v;
|
||||
}
|
||||
|
||||
private String getQualsString() {
|
||||
return UnifiedReadBackedPileup.quals2String(getQuals());
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,420 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Dec 29, 2009
|
||||
* Time: 12:25:55 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class UnifiedReadBackedExtendedEventPileup implements ReadBackedExtendedEventPileup {
|
||||
private GenomeLoc loc = null;
|
||||
private List<ExtendedEventPileupElement> pileup = null;
|
||||
|
||||
private int size = 0; // cached value of the size of the pileup
|
||||
private int maxDeletionLength = 0; // cached value of the length of the longest deletion observed at the site
|
||||
private int nDeletions = 0; // cached value of the number of deletions
|
||||
private int nInsertions = 0;
|
||||
private int nMQ0Reads = 0; // cached value of the number of MQ0 reads
|
||||
|
||||
/**
|
||||
* Create a new version of a read backed pileup at loc without any aligned reads
|
||||
*
|
||||
*/
|
||||
public UnifiedReadBackedExtendedEventPileup(GenomeLoc loc ) {
|
||||
this(loc, new ArrayList<ExtendedEventPileupElement>(0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
|
||||
* offsets. This lower level constructure assumes pileup is well-formed and merely keeps a
|
||||
* pointer to pileup. Don't go changing the data in pileup.
|
||||
*
|
||||
*/
|
||||
public UnifiedReadBackedExtendedEventPileup(GenomeLoc loc, List<ExtendedEventPileupElement> pileup ) {
|
||||
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedExtendedEventPileup");
|
||||
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedExtendedEventPileup");
|
||||
|
||||
this.loc = loc;
|
||||
this.pileup = pileup;
|
||||
|
||||
calculatedCachedData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimization of above constructor where all of the cached data is provided
|
||||
* @param loc
|
||||
* @param pileup
|
||||
*/
|
||||
public UnifiedReadBackedExtendedEventPileup(GenomeLoc loc, List<ExtendedEventPileupElement> pileup, int size,
|
||||
int maxDeletionLength, int nInsertions, int nDeletions, int nMQ0Reads ) {
|
||||
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedExtendedEventPileup");
|
||||
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedExtendedEventPileup");
|
||||
|
||||
this.loc = loc;
|
||||
this.pileup = pileup;
|
||||
this.size = size;
|
||||
this.maxDeletionLength = maxDeletionLength;
|
||||
this.nDeletions = nDeletions;
|
||||
this.nInsertions = nInsertions;
|
||||
this.nMQ0Reads = nMQ0Reads;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
|
||||
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
|
||||
* sizes, nDeletion, etc. over and over potentially.
|
||||
*/
|
||||
private void calculatedCachedData() {
|
||||
size = 0;
|
||||
nDeletions = 0;
|
||||
nInsertions = 0;
|
||||
nMQ0Reads = 0;
|
||||
|
||||
for ( ExtendedEventPileupElement p : this ) {
|
||||
|
||||
size++;
|
||||
if ( p.isDeletion() ) {
|
||||
nDeletions++;
|
||||
maxDeletionLength = Math.max(maxDeletionLength, p.getEventLength());
|
||||
} else {
|
||||
if ( p.isInsertion() ) nInsertions++;
|
||||
}
|
||||
|
||||
if ( p.getRead().getMappingQuality() == 0 ) {
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// Special 'constructors'
|
||||
//
|
||||
// --------------------------------------------------------
|
||||
|
||||
|
||||
/**
|
||||
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
|
||||
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
|
||||
* of the pileup (just returns this) if there are no MQ0 reads in the pileup.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public ReadBackedExtendedEventPileup getPileupWithoutMappingQualityZeroReads() {
|
||||
|
||||
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
|
||||
ArrayList<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
|
||||
for ( ExtendedEventPileupElement p : pileup ) {
|
||||
if ( p.getRead().getMappingQuality() > 0 ) {
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
}
|
||||
return new UnifiedReadBackedExtendedEventPileup(loc, filteredPileup);
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReadBackedExtendedEventPileup getMappingFilteredPileup( int minMapQ ) {
|
||||
ArrayList<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
|
||||
|
||||
for ( ExtendedEventPileupElement p : pileup ) {
|
||||
if ( p.getRead().getMappingQuality() >= minMapQ ) {
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
}
|
||||
|
||||
return new UnifiedReadBackedExtendedEventPileup(loc, filteredPileup);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a pileup randomly downsampled to the desiredCoverage.
|
||||
*
|
||||
* @param desiredCoverage
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public ReadBackedExtendedEventPileup getDownsampledPileup(int desiredCoverage) {
|
||||
if ( size() <= desiredCoverage )
|
||||
return this;
|
||||
|
||||
// randomly choose numbers corresponding to positions in the reads list
|
||||
Random generator = new Random();
|
||||
TreeSet<Integer> positions = new TreeSet<Integer>();
|
||||
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
|
||||
if ( positions.add(generator.nextInt(pileup.size())) )
|
||||
i++;
|
||||
}
|
||||
|
||||
Iterator positionIter = positions.iterator();
|
||||
ArrayList<ExtendedEventPileupElement> downsampledPileup = new ArrayList<ExtendedEventPileupElement>();
|
||||
|
||||
while ( positionIter.hasNext() ) {
|
||||
int nextReadToKeep = (Integer)positionIter.next();
|
||||
downsampledPileup.add(pileup.get(nextReadToKeep));
|
||||
}
|
||||
|
||||
return new UnifiedReadBackedExtendedEventPileup(getLocation(), downsampledPileup);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// iterators
|
||||
//
|
||||
// --------------------------------------------------------
|
||||
|
||||
/**
|
||||
* The best way to access PileupElements where you only care about the bases and quals in the pileup.
|
||||
*
|
||||
* for (PileupElement p : this) { doSomething(p); }
|
||||
*
|
||||
* Provides efficient iteration of the data.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public Iterator<ExtendedEventPileupElement> iterator() {
|
||||
return pileup.iterator();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the number of deletion events in this pileup
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int getNumberOfDeletions() {
|
||||
return nDeletions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of insertion events in this pileup
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int getNumberOfInsertions() {
|
||||
return nInsertions;
|
||||
}
|
||||
|
||||
|
||||
/** Returns the length of the longest deletion observed at the site this
|
||||
* pileup is associated with (NOTE: by convention, both insertions and deletions
|
||||
* are associated with genomic location immediately before the actual event). If
|
||||
* there are no deletions at the site, returns 0.
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int getMaxDeletionLength() {
|
||||
return maxDeletionLength;
|
||||
}
|
||||
/**
|
||||
* Returns the number of mapping quality zero reads in this pileup.
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int getNumberOfMappingQualityZeroReads() {
|
||||
return nMQ0Reads;
|
||||
}
|
||||
|
||||
// public int getNumberOfDeletions() {
|
||||
// int n = 0;
|
||||
//
|
||||
// for ( int i = 0; i < size(); i++ ) {
|
||||
// if ( getOffsets().get(i) != -1 ) { n++; }
|
||||
// }
|
||||
//
|
||||
// return n;
|
||||
// }
|
||||
|
||||
/**
|
||||
* @return the number of elements in this pileup
|
||||
*/
|
||||
@Override
|
||||
public int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the location of this pileup
|
||||
*/
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return loc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getShortPileupString() {
|
||||
// In the pileup format, each extended event line has genomic position (chromosome name and offset),
|
||||
// reference "base" (always set to "E" for E(xtended)), then 'I','D', or '.' symbol for each read representing
|
||||
// insertion, deletion or no-event, respectively.
|
||||
return String.format("%s %s E %s",
|
||||
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
|
||||
new String(getEvents()) );
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// Convenience functions that may be slow
|
||||
//
|
||||
// --------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<SAMRecord> getReads() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
|
||||
for ( ExtendedEventPileupElement pile : this ) { reads.add(pile.getRead()); }
|
||||
return reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<Integer> getOffsets() {
|
||||
List<Integer> offsets = new ArrayList<Integer>(size());
|
||||
for ( ExtendedEventPileupElement pile : this ) { offsets.add(pile.getOffset()); }
|
||||
return offsets;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of the events in this pileup ('I', 'D', or '.'). Note this call costs O(n) and allocates fresh array each time
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public byte[] getEvents() {
|
||||
byte[] v = new byte[size()];
|
||||
int i = 0;
|
||||
for ( ExtendedEventPileupElement e : this ) {
|
||||
switch ( e.getType() ) {
|
||||
case INSERTION: v[i] = 'I'; break;
|
||||
case DELETION: v[i] = 'D'; break;
|
||||
case NOEVENT: v[i] = '.'; break;
|
||||
default: throw new StingException("Unknown event type encountered: "+e.getType());
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
/** A shortcut for getEventStringsWithCounts(null);
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<Pair<String,Integer>> getEventStringsWithCounts() {
|
||||
return getEventStringsWithCounts(null);
|
||||
}
|
||||
|
||||
/** Returns String representation of all distinct extended events (indels) at the site along with
|
||||
* observation counts (numbers of reads) for each distinct event. If refBases is null, a simple string representation for
|
||||
* deletions will be generated as "<length>D" (i.e. "5D"); if the reference bases are provided, the actual
|
||||
* deleted sequence will be used in the string representation (e.g. "-AAC").
|
||||
* @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and
|
||||
* extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+<length of longest deletion>)
|
||||
* @return list of distinct events; first element of a pair is a string representation of the event, second element
|
||||
* gives the number of reads, in which that event was observed
|
||||
*/
|
||||
@Override
|
||||
public List<Pair<String,Integer>> getEventStringsWithCounts(byte[] refBases) {
|
||||
Map<String, Integer> events = new HashMap<String,Integer>();
|
||||
|
||||
for ( ExtendedEventPileupElement e : this ) {
|
||||
Integer cnt;
|
||||
String indel = null;
|
||||
switch ( e.getType() ) {
|
||||
case INSERTION:
|
||||
indel = "+"+e.getEventBases();
|
||||
break;
|
||||
case DELETION:
|
||||
indel = getDeletionString(e.getEventLength(),refBases);
|
||||
break;
|
||||
case NOEVENT: continue;
|
||||
default: throw new StingException("Unknown event type encountered: "+e.getType());
|
||||
}
|
||||
|
||||
cnt = events.get(indel);
|
||||
if ( cnt == null ) events.put(indel,1);
|
||||
else events.put(indel,cnt.intValue()+1);
|
||||
}
|
||||
|
||||
List<Pair<String,Integer>> eventList = new ArrayList<Pair<String,Integer>>(events.size());
|
||||
for ( Map.Entry<String,Integer> m : events.entrySet() ) {
|
||||
eventList.add( new Pair<String,Integer>(m.getKey(),m.getValue()));
|
||||
}
|
||||
return eventList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds string representation of the deletion event. If refBases is null, the representation will be
|
||||
* "<length>D" (e.g. "5D"); if the reference bases are provided, a verbose representation (e.g. "-AAC")
|
||||
* will be generated. NOTE: refBases must start with the base prior to the actual deletion (i.e. deleted
|
||||
* base(s) are refBase[1], refBase[2], ...), and the length of the passed array must be sufficient to accomodate the
|
||||
* deletion length (i.e. size of refBase must be at least length+1).
|
||||
* @param length
|
||||
* @param refBases
|
||||
* @return
|
||||
*/
|
||||
static String getDeletionString(int length, byte[] refBases) {
|
||||
if ( refBases == null ) {
|
||||
return Integer.toString(length)+"D"; // if we do not have reference bases, we can only report something like "5D"
|
||||
} else {
|
||||
return "-"+new String(refBases,1,length).toUpperCase();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an array of the mapping qualities
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public byte[] getMappingQuals() {
|
||||
byte[] v = new byte[size()];
|
||||
int i = 0;
|
||||
for ( ExtendedEventPileupElement e : this ) { v[i++] = (byte)e.getRead().getMappingQuality(); }
|
||||
return v;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,550 @@
|
|||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import org.broadinstitute.sting.gatk.iterators.IterableIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Version two file implementing pileups of bases in reads at a locus.
|
||||
*
|
||||
* @author Mark DePristo
|
||||
*/
|
||||
public class UnifiedReadBackedPileup implements ReadBackedPileup {
|
||||
private GenomeLoc loc = null;
|
||||
private List<PileupElement> pileup = null;
|
||||
|
||||
private int size = 0; // cached value of the size of the pileup
|
||||
private int nDeletions = 0; // cached value of the number of deletions
|
||||
private int nMQ0Reads = 0; // cached value of the number of MQ0 reads
|
||||
|
||||
/**
|
||||
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
|
||||
* offsets. This pileup will contain a list, in order of the reads, of the piled bases at
|
||||
* reads[i] for all i in offsets. Does not make a copy of the data, so it's not safe to
|
||||
* go changing the reads.
|
||||
*
|
||||
* @param loc
|
||||
* @param reads
|
||||
* @param offsets
|
||||
*/
|
||||
public UnifiedReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
this(loc, readsOffsets2Pileup(reads, offsets));
|
||||
}
|
||||
|
||||
public UnifiedReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, int offset ) {
|
||||
this(loc, readsOffsets2Pileup(reads, offset));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new version of a read backed pileup at loc without any aligned reads
|
||||
*
|
||||
*/
|
||||
public UnifiedReadBackedPileup(GenomeLoc loc ) {
|
||||
this(loc, new ArrayList<PileupElement>(0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
|
||||
* offsets. This lower level constructure assumes pileup is well-formed and merely keeps a
|
||||
* pointer to pileup. Don't go changing the data in pileup.
|
||||
*
|
||||
*/
|
||||
public UnifiedReadBackedPileup(GenomeLoc loc, List<PileupElement> pileup ) {
|
||||
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup");
|
||||
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup");
|
||||
|
||||
this.loc = loc;
|
||||
this.pileup = pileup;
|
||||
|
||||
calculatedCachedData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimization of above constructor where all of the cached data is provided
|
||||
* @param loc
|
||||
* @param pileup
|
||||
*/
|
||||
public UnifiedReadBackedPileup(GenomeLoc loc, List<PileupElement> pileup, int size, int nDeletions, int nMQ0Reads ) {
|
||||
if ( loc == null ) throw new StingException("Illegal null genomeloc in UnifiedReadBackedPileup");
|
||||
if ( pileup == null ) throw new StingException("Illegal null pileup in UnifiedReadBackedPileup");
|
||||
|
||||
this.loc = loc;
|
||||
this.pileup = pileup;
|
||||
this.size = size;
|
||||
this.nDeletions = nDeletions;
|
||||
this.nMQ0Reads = nMQ0Reads;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
|
||||
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
|
||||
* sizes, nDeletion, etc. over and over potentially.
|
||||
*/
|
||||
private void calculatedCachedData() {
|
||||
size = 0;
|
||||
nDeletions = 0;
|
||||
nMQ0Reads = 0;
|
||||
|
||||
for ( PileupElement p : this ) {
|
||||
size++;
|
||||
if ( p.isDeletion() ) {
|
||||
nDeletions++;
|
||||
}
|
||||
if ( p.getRead().getMappingQuality() == 0 ) {
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Helper routine for converting reads and offset lists to a PileupElement list.
|
||||
*
|
||||
* @param reads
|
||||
* @param offsets
|
||||
* @return
|
||||
*/
|
||||
private static ArrayList<PileupElement> readsOffsets2Pileup(List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
if ( reads == null ) throw new StingException("Illegal null read list in UnifiedReadBackedPileup");
|
||||
if ( offsets == null ) throw new StingException("Illegal null offsets list in UnifiedReadBackedPileup");
|
||||
if ( reads.size() != offsets.size() ) throw new StingException("Reads and offset lists have different sizes!");
|
||||
|
||||
ArrayList<PileupElement> pileup = new ArrayList<PileupElement>(reads.size());
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
pileup.add( new PileupElement( reads.get(i), offsets.get(i) ) );
|
||||
}
|
||||
|
||||
return pileup;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper routine for converting reads and a single offset to a PileupElement list.
|
||||
*
|
||||
* @param reads
|
||||
* @param offset
|
||||
* @return
|
||||
*/
|
||||
private static ArrayList<PileupElement> readsOffsets2Pileup(List<SAMRecord> reads, int offset ) {
|
||||
if ( reads == null ) throw new StingException("Illegal null read list in UnifiedReadBackedPileup");
|
||||
if ( offset < 0 ) throw new StingException("Illegal offset < 0 UnifiedReadBackedPileup");
|
||||
|
||||
ArrayList<PileupElement> pileup = new ArrayList<PileupElement>(reads.size());
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
pileup.add( new PileupElement( reads.get(i), offset ) );
|
||||
}
|
||||
|
||||
return pileup;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// Special 'constructors'
|
||||
//
|
||||
// --------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this
|
||||
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
|
||||
* of the pileup (just returns this) if there are no deletions in the pileup.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public ReadBackedPileup getPileupWithoutDeletions() {
|
||||
if ( getNumberOfDeletions() > 0 ) {
|
||||
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
|
||||
|
||||
for ( PileupElement p : pileup ) {
|
||||
if ( !p.isDeletion() ) {
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
}
|
||||
return new UnifiedReadBackedPileup(loc, filteredPileup);
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new ReadBackedPileup where only one read from an overlapping read
|
||||
* pair is retained. If the two reads in question disagree to their basecall,
|
||||
* neither read is retained. If they agree on the base, the read with the higher
|
||||
* quality observation is retained
|
||||
*
|
||||
* @return the newly filtered pileup
|
||||
*/
|
||||
@Override
|
||||
public ReadBackedPileup getOverlappingFragmentFilteredPileup() {
|
||||
Map<String, PileupElement> filteredPileup = new HashMap<String, PileupElement>();
|
||||
|
||||
for ( PileupElement p : pileup ) {
|
||||
String readName = p.getRead().getReadName();
|
||||
|
||||
// if we've never seen this read before, life is good
|
||||
if (!filteredPileup.containsKey(readName)) {
|
||||
filteredPileup.put(readName, p);
|
||||
} else {
|
||||
PileupElement existing = filteredPileup.get(readName);
|
||||
|
||||
// if the reads disagree at this position, throw them both out. Otherwise
|
||||
// keep the element with the higher quality score
|
||||
if (existing.getBase() != p.getBase()) {
|
||||
filteredPileup.remove(readName);
|
||||
} else {
|
||||
if (existing.getQual() < p.getQual()) {
|
||||
filteredPileup.put(readName, p);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new UnifiedReadBackedPileup(loc, new ArrayList<PileupElement>(filteredPileup.values()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
|
||||
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
|
||||
* of the pileup (just returns this) if there are no MQ0 reads in the pileup.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public ReadBackedPileup getPileupWithoutMappingQualityZeroReads() {
|
||||
|
||||
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
|
||||
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
|
||||
for ( PileupElement p : pileup ) {
|
||||
if ( p.getRead().getMappingQuality() > 0 ) {
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
}
|
||||
return new UnifiedReadBackedPileup(loc, filteredPileup);
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
|
||||
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
|
||||
* @param minBaseQ
|
||||
* @param minMapQ
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public ReadBackedPileup getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ ) {
|
||||
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
|
||||
|
||||
for ( PileupElement p : pileup ) {
|
||||
if ( p.getRead().getMappingQuality() >= minMapQ && (p.isDeletion() || p.getQual() >= minBaseQ) ) {
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
}
|
||||
|
||||
return new UnifiedReadBackedPileup(loc, filteredPileup);
|
||||
}
|
||||
|
||||
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ.
|
||||
* This method allocates and returns a new instance of ReadBackedPileup.
|
||||
* @param minBaseQ
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public ReadBackedPileup getBaseFilteredPileup( int minBaseQ ) {
|
||||
return getBaseAndMappingFilteredPileup(minBaseQ, -1);
|
||||
}
|
||||
|
||||
/** Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ.
|
||||
* This method allocates and returns a new instance of ReadBackedPileup.
|
||||
* @param minMapQ
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public ReadBackedPileup getMappingFilteredPileup( int minMapQ ) {
|
||||
return getBaseAndMappingFilteredPileup(-1, minMapQ);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a pileup randomly downsampled to the desiredCoverage.
|
||||
*
|
||||
* @param desiredCoverage
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public ReadBackedPileup getDownsampledPileup(int desiredCoverage) {
|
||||
if ( size() <= desiredCoverage )
|
||||
return this;
|
||||
|
||||
// randomly choose numbers corresponding to positions in the reads list
|
||||
Random generator = new Random();
|
||||
TreeSet<Integer> positions = new TreeSet<Integer>();
|
||||
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
|
||||
if ( positions.add(generator.nextInt(pileup.size())) )
|
||||
i++;
|
||||
}
|
||||
|
||||
Iterator positionIter = positions.iterator();
|
||||
ArrayList<PileupElement> downsampledPileup = new ArrayList<PileupElement>();
|
||||
|
||||
while ( positionIter.hasNext() ) {
|
||||
int nextReadToKeep = (Integer)positionIter.next();
|
||||
downsampledPileup.add(pileup.get(nextReadToKeep));
|
||||
}
|
||||
|
||||
return new UnifiedReadBackedPileup(getLocation(), downsampledPileup);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReadBackedPileup getPileupForSample(String sampleName) {
|
||||
List<PileupElement> filteredPileup = new ArrayList<PileupElement>();
|
||||
for(PileupElement p: this) {
|
||||
SAMRecord read = p.getRead();
|
||||
if(sampleName != null) {
|
||||
if(read.getReadGroup() != null && sampleName.equals(read.getReadGroup().getSample()))
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
else {
|
||||
if(read.getReadGroup() == null || read.getReadGroup().getSample() == null)
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
}
|
||||
return filteredPileup.size()>0 ? new UnifiedReadBackedPileup(loc,filteredPileup) : null;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// iterators
|
||||
//
|
||||
// --------------------------------------------------------
|
||||
|
||||
/**
|
||||
* The best way to access PileupElements where you only care about the bases and quals in the pileup.
|
||||
*
|
||||
* for (PileupElement p : this) { doSomething(p); }
|
||||
*
|
||||
* Provides efficient iteration of the data.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public Iterator<PileupElement> iterator() {
|
||||
return pileup.iterator();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The best way to access PileupElements where you only care not only about bases and quals in the pileup
|
||||
* but also need access to the index of the pileup element in the pile.
|
||||
*
|
||||
* for (ExtendedPileupElement p : this) { doSomething(p); }
|
||||
*
|
||||
* Provides efficient iteration of the data.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
|
||||
// todo -- reimplement efficiently
|
||||
// todo -- why is this public?
|
||||
@Override
|
||||
public IterableIterator<ExtendedPileupElement> extendedForeachIterator() {
|
||||
ArrayList<ExtendedPileupElement> x = new ArrayList<ExtendedPileupElement>(size());
|
||||
int i = 0;
|
||||
for ( PileupElement pile : this ) {
|
||||
x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this));
|
||||
}
|
||||
|
||||
return new IterableIterator<ExtendedPileupElement>(x.iterator());
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple useful routine to count the number of deletion bases in this pileup
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int getNumberOfDeletions() {
|
||||
return nDeletions;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumberOfMappingQualityZeroReads() {
|
||||
return nMQ0Reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the number of elements in this pileup
|
||||
*/
|
||||
@Override
|
||||
public int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the location of this pileup
|
||||
*/
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return loc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get counts of A, C, G, T in order, which returns a int[4] vector with counts according
|
||||
* to BaseUtils.simpleBaseToBaseIndex for each base.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int[] getBaseCounts() {
|
||||
int[] counts = new int[4];
|
||||
for ( PileupElement pile : this ) {
|
||||
// skip deletion sites
|
||||
if ( ! pile.isDeletion() ) {
|
||||
int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase());
|
||||
if (index != -1)
|
||||
counts[index]++;
|
||||
}
|
||||
}
|
||||
|
||||
return counts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean hasSecondaryBases() {
|
||||
for ( PileupElement pile : this ) {
|
||||
// skip deletion sites
|
||||
if ( ! pile.isDeletion() && BaseUtils.isRegularBase((char)pile.getSecondBase()) )
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPileupString(char ref) {
|
||||
// In the pileup format, each line represents a genomic position, consisting of chromosome name,
|
||||
// coordinate, reference base, read bases, read qualities and alignment mapping qualities.
|
||||
return String.format("%s %s %c %s %s",
|
||||
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
|
||||
ref, // reference base
|
||||
new String(getBases()),
|
||||
getQualsString());
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// Convenience functions that may be slow
|
||||
//
|
||||
// --------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<SAMRecord> getReads() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
|
||||
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
|
||||
return reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<Integer> getOffsets() {
|
||||
List<Integer> offsets = new ArrayList<Integer>(size());
|
||||
for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); }
|
||||
return offsets;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public byte[] getBases() {
|
||||
byte[] v = new byte[size()];
|
||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); }
|
||||
return v;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of the secondary bases in this pileup. Note this call costs O(n) and allocates fresh array each time
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public byte[] getSecondaryBases() {
|
||||
byte[] v = new byte[size()];
|
||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); }
|
||||
return v;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public byte[] getQuals() {
|
||||
byte[] v = new byte[size()];
|
||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); }
|
||||
return v;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an array of the mapping qualities
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public byte[] getMappingQuals() {
|
||||
byte[] v = new byte[size()];
|
||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = (byte)pile.getRead().getMappingQuality(); }
|
||||
return v;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Private functions for printing pileups
|
||||
//
|
||||
private String getMappingQualsString() {
|
||||
return quals2String(getMappingQuals());
|
||||
}
|
||||
|
||||
static String quals2String( byte[] quals ) {
|
||||
StringBuilder qualStr = new StringBuilder();
|
||||
for ( int qual : quals ) {
|
||||
qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea
|
||||
char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63
|
||||
qualStr.append(qualChar);
|
||||
}
|
||||
|
||||
return qualStr.toString();
|
||||
}
|
||||
|
||||
private String getQualsString() {
|
||||
return quals2String(getQuals());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder s = new StringBuilder();
|
||||
s.append(getLocation());
|
||||
s.append(": ");
|
||||
|
||||
for ( PileupElement p : this ) {
|
||||
s.append((char)p.getBase());
|
||||
}
|
||||
|
||||
return s.toString();
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue