Switching over DownsamplingLocusIteratorByState -> LocusIteratorByState. Some operations
will not be as fast as they could be because the workflow is currently merge sam records (sharding) -> split sam records (LocusIteratorByState) -> merge records (LocusIteraotorByState) -> split records (StratifiedAlignmentContext), but this will be fixed when StratifiedAlignmentContext is updated to take advantage of the new functionality in ReadBackedPileup. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3599 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1d50fc7087
commit
c806ffba5f
|
|
@ -68,13 +68,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
||||||
|
|
||||||
LocusIterator locusIterator;
|
LocusIterator locusIterator;
|
||||||
Iterator<SAMRecord> wrappedIterator = TraversalEngine.addMandatoryFilteringIterators(iterator, filters);
|
Iterator<SAMRecord> wrappedIterator = TraversalEngine.addMandatoryFilteringIterators(iterator, filters);
|
||||||
if(sourceInfo.getDownsamplingMethod() != null &&
|
locusIterator = new LocusIteratorByState(wrappedIterator,sourceInfo,discards);
|
||||||
(sourceInfo.getDownsamplingMethod().type == DownsampleType.EXPERIMENTAL_BY_SAMPLE)) {
|
|
||||||
if ( discards.size() > 0 )
|
|
||||||
throw new StingException("Experimental downsampling iterator doesn't support base discarding at this point; complain to Matt Hanna");
|
|
||||||
locusIterator = new DownsamplingLocusIteratorByState(wrappedIterator,sourceInfo);
|
|
||||||
} else
|
|
||||||
locusIterator = new LocusIteratorByState(wrappedIterator,sourceInfo, discards);
|
|
||||||
|
|
||||||
this.locusOverflowTracker = locusIterator.getLocusOverflowTracker();
|
this.locusOverflowTracker = locusIterator.getLocusOverflowTracker();
|
||||||
|
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -26,13 +26,14 @@
|
||||||
package org.broadinstitute.sting.gatk.iterators;
|
package org.broadinstitute.sting.gatk.iterators;
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import net.sf.picard.filter.SamRecordFilter;
|
import net.sf.picard.util.PeekableIterator;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.Reads;
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
||||||
|
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.*;
|
import org.broadinstitute.sting.utils.pileup.*;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -47,7 +48,6 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
//
|
//
|
||||||
//public enum Discard { ADAPTOR_BASES }
|
//public enum Discard { ADAPTOR_BASES }
|
||||||
//public static final EnumSet<Discard> NO_DISCARDS = EnumSet.noneOf(Discard.class);
|
//public static final EnumSet<Discard> NO_DISCARDS = EnumSet.noneOf(Discard.class);
|
||||||
|
|
||||||
public static final List<LocusIteratorFilter> NO_FILTERS = Arrays.asList();
|
public static final List<LocusIteratorFilter> NO_FILTERS = Arrays.asList();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -64,9 +64,11 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
// member fields
|
// member fields
|
||||||
//
|
//
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
private final PushbackIterator<SAMRecord> it;
|
|
||||||
private boolean hasExtendedEvents = false; // will be set to true if at least one read had an indel right before the current position
|
private boolean hasExtendedEvents = false; // will be set to true if at least one read had an indel right before the current position
|
||||||
|
|
||||||
|
private final Collection<String> sampleNames = new ArrayList<String>();
|
||||||
|
private final ReadStateManager readStates;
|
||||||
|
|
||||||
private class SAMRecordState {
|
private class SAMRecordState {
|
||||||
SAMRecord read;
|
SAMRecord read;
|
||||||
int readOffset = -1; // how far are we offset from the start of the read bases?
|
int readOffset = -1; // how far are we offset from the start of the read bases?
|
||||||
|
|
@ -256,7 +258,6 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private LinkedList<SAMRecordState> readStates = new LinkedList<SAMRecordState>();
|
|
||||||
//final boolean DEBUG = false;
|
//final boolean DEBUG = false;
|
||||||
//final boolean DEBUG2 = false && DEBUG;
|
//final boolean DEBUG2 = false && DEBUG;
|
||||||
private Reads readInfo;
|
private Reads readInfo;
|
||||||
|
|
@ -273,7 +274,14 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
}
|
}
|
||||||
|
|
||||||
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, Reads readInformation, List<LocusIteratorFilter> filters ) {
|
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, Reads readInformation, List<LocusIteratorFilter> filters ) {
|
||||||
this.it = new PushbackIterator<SAMRecord>(samIterator);
|
// Aggregate all sample names.
|
||||||
|
// TODO: Push in header via constructor
|
||||||
|
if(GenomeAnalysisEngine.instance != null && GenomeAnalysisEngine.instance.getDataSource() != null) {
|
||||||
|
sampleNames.addAll(SampleUtils.getSAMFileSamples(GenomeAnalysisEngine.instance.getSAMFileHeader()));
|
||||||
|
}
|
||||||
|
// Add a null sample name as a catch-all for reads without samples
|
||||||
|
if(!sampleNames.contains(null)) sampleNames.add(null);
|
||||||
|
readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod(),readInformation.getMaxReadsAtLocus(),sampleNames);
|
||||||
this.readInfo = readInformation;
|
this.readInfo = readInformation;
|
||||||
this.filters = filters;
|
this.filters = filters;
|
||||||
overflowTracker = new LocusOverflowTracker(readInformation.getMaxReadsAtLocus());
|
overflowTracker = new LocusOverflowTracker(readInformation.getMaxReadsAtLocus());
|
||||||
|
|
@ -293,24 +301,25 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
//if ( DEBUG ) System.out.printf("hasNext() = %b%n", r);
|
//if ( DEBUG ) System.out.printf("hasNext() = %b%n", r);
|
||||||
|
|
||||||
// if we don't have a next record, make sure we clean the warning queue
|
// if we don't have a next record, make sure we clean the warning queue
|
||||||
|
// TODO: Note that this implementation requires that hasNext() always be called before next().
|
||||||
if (!r) overflowTracker.cleanWarningQueue();
|
if (!r) overflowTracker.cleanWarningQueue();
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void printState() {
|
public void printState() {
|
||||||
for ( SAMRecordState state : readStates ) {
|
for(String sampleName: sampleNames) {
|
||||||
logger.debug(String.format("printState():"));
|
Iterator<SAMRecordState> iterator = readStates.iteratorForSample(sampleName);
|
||||||
SAMRecord read = state.getRead();
|
while(iterator.hasNext()) {
|
||||||
int offset = state.getReadOffset();
|
SAMRecordState state = iterator.next();
|
||||||
logger.debug(String.format(" read: %s(%d)=%s, cigar=%s", read.getReadName(), offset, (char)read.getReadBases()[offset], read.getCigarString()));
|
logger.debug(String.format("printState():"));
|
||||||
|
SAMRecord read = state.getRead();
|
||||||
|
int offset = state.getReadOffset();
|
||||||
|
logger.debug(String.format(" read: %s(%d)=%s, cigar=%s", read.getReadName(), offset, (char)read.getReadBases()[offset], read.getCigarString()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void clear() {
|
|
||||||
logger.debug(String.format(("clear() called")));
|
|
||||||
readStates.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
private GenomeLoc getLocation() {
|
private GenomeLoc getLocation() {
|
||||||
return readStates.isEmpty() ? null : readStates.getFirst().getLocation();
|
return readStates.isEmpty() ? null : readStates.getFirst().getLocation();
|
||||||
}
|
}
|
||||||
|
|
@ -334,9 +343,9 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
* nextAlignmentContext MUST BE null in order for this method to advance to the next entry.
|
* nextAlignmentContext MUST BE null in order for this method to advance to the next entry.
|
||||||
*/
|
*/
|
||||||
private void lazyLoadNextAlignmentContext() {
|
private void lazyLoadNextAlignmentContext() {
|
||||||
while(nextAlignmentContext == null && (!readStates.isEmpty() || it.hasNext())) {
|
while(nextAlignmentContext == null && readStates.hasNext()) {
|
||||||
// this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref:
|
// this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref:
|
||||||
collectPendingReads(readInfo.getMaxReadsAtLocus());
|
readStates.collectPendingReads();
|
||||||
|
|
||||||
int size = 0;
|
int size = 0;
|
||||||
int nDeletions = 0;
|
int nDeletions = 0;
|
||||||
|
|
@ -350,144 +359,113 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
// In this case, the subsequent call to next() will emit the normal pileup at the current base
|
// In this case, the subsequent call to next() will emit the normal pileup at the current base
|
||||||
// and shift the position.
|
// and shift the position.
|
||||||
if (readInfo.generateExtendedEvents() && hasExtendedEvents) {
|
if (readInfo.generateExtendedEvents() && hasExtendedEvents) {
|
||||||
ArrayList<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size());
|
Map<String,AbstractReadBackedPileup<ReadBackedExtendedEventPileup,ExtendedEventPileupElement>> fullExtendedEventPileup =
|
||||||
|
new HashMap<String,AbstractReadBackedPileup<ReadBackedExtendedEventPileup,ExtendedEventPileupElement>>();
|
||||||
|
|
||||||
int maxDeletionLength = 0;
|
|
||||||
|
|
||||||
for ( SAMRecordState state : readStates ) {
|
|
||||||
if ( state.hadIndel() ) {
|
|
||||||
size++;
|
|
||||||
if ( state.getEventBases() == null ) {
|
|
||||||
nDeletions++;
|
|
||||||
maxDeletionLength = Math.max(maxDeletionLength,state.getEventLength());
|
|
||||||
}
|
|
||||||
else nInsertions++;
|
|
||||||
indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
|
|
||||||
state.getReadEventStartOffset(),
|
|
||||||
state.getEventLength(),
|
|
||||||
state.getEventBases()) );
|
|
||||||
|
|
||||||
} else {
|
|
||||||
if ( state.getCurrentCigarOperator() != CigarOperator.N ) {
|
|
||||||
// this read has no indel associated with the previous position on the ref;
|
|
||||||
// we count this read in only if it has actual bases, not N span...
|
|
||||||
if ( state.getCurrentCigarOperator() != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci() ) {
|
|
||||||
|
|
||||||
// if cigar operator is D but the read has no extended event reported (that's why we ended
|
|
||||||
// up in this branch), it means that we are currently inside a deletion that started earlier;
|
|
||||||
// we count such reads (with a longer deletion spanning over a deletion at the previous base we are
|
|
||||||
// about to report) only if includeReadsWithDeletionAtLoci is true.
|
|
||||||
size++;
|
|
||||||
indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
|
|
||||||
state.getReadOffset()-1,
|
|
||||||
-1) // length=-1 --> noevent
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ( state.getRead().getMappingQuality() == 0 ) {
|
|
||||||
nMQ0Reads++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
hasExtendedEvents = false; // we are done with extended events prior to current ref base
|
|
||||||
SAMRecordState our1stState = readStates.getFirst();
|
SAMRecordState our1stState = readStates.getFirst();
|
||||||
// get current location on the reference and decrement it by 1: the indels we just stepped over
|
// get current location on the reference and decrement it by 1: the indels we just stepped over
|
||||||
// are associated with the *previous* reference base
|
// are associated with the *previous* reference base
|
||||||
GenomeLoc loc = GenomeLocParser.incPos(our1stState.getLocation(),-1);
|
GenomeLoc loc = GenomeLocParser.incPos(our1stState.getLocation(),-1);
|
||||||
|
|
||||||
|
for(String sampleName: sampleNames) {
|
||||||
|
Iterator<SAMRecordState> iterator = readStates.iteratorForSample(sampleName);
|
||||||
|
ArrayList<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size());
|
||||||
|
|
||||||
|
size = 0;
|
||||||
|
nDeletions = 0;
|
||||||
|
nInsertions = 0;
|
||||||
|
nMQ0Reads = 0;
|
||||||
|
int maxDeletionLength = 0;
|
||||||
|
|
||||||
|
while(iterator.hasNext()) {
|
||||||
|
SAMRecordState state = iterator.next();
|
||||||
|
if ( state.hadIndel() ) {
|
||||||
|
size++;
|
||||||
|
if ( state.getEventBases() == null ) {
|
||||||
|
nDeletions++;
|
||||||
|
maxDeletionLength = Math.max(maxDeletionLength,state.getEventLength());
|
||||||
|
}
|
||||||
|
else nInsertions++;
|
||||||
|
indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
|
||||||
|
state.getReadEventStartOffset(),
|
||||||
|
state.getEventLength(),
|
||||||
|
state.getEventBases()) );
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if ( state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||||
|
// this read has no indel associated with the previous position on the ref;
|
||||||
|
// we count this read in only if it has actual bases, not N span...
|
||||||
|
if ( state.getCurrentCigarOperator() != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci() ) {
|
||||||
|
|
||||||
|
// if cigar operator is D but the read has no extended event reported (that's why we ended
|
||||||
|
// up in this branch), it means that we are currently inside a deletion that started earlier;
|
||||||
|
// we count such reads (with a longer deletion spanning over a deletion at the previous base we are
|
||||||
|
// about to report) only if includeReadsWithDeletionAtLoci is true.
|
||||||
|
size++;
|
||||||
|
indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
|
||||||
|
state.getReadOffset()-1,
|
||||||
|
-1) // length=-1 --> noevent
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( state.getRead().getMappingQuality() == 0 ) {
|
||||||
|
nMQ0Reads++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sampleName,new ReadBackedExtendedEventPileupImpl(loc,indelPile,size,maxDeletionLength,nInsertions,nDeletions,nMQ0Reads));
|
||||||
|
}
|
||||||
|
hasExtendedEvents = false; // we are done with extended events prior to current ref base
|
||||||
// System.out.println("Indel(s) at "+loc);
|
// System.out.println("Indel(s) at "+loc);
|
||||||
// for ( ExtendedEventPileupElement pe : indelPile ) { if ( pe.isIndel() ) System.out.println(" "+pe.toString()); }
|
// for ( ExtendedEventPileupElement pe : indelPile ) { if ( pe.isIndel() ) System.out.println(" "+pe.toString()); }
|
||||||
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, indelPile, size, maxDeletionLength, nInsertions, nDeletions, nMQ0Reads));
|
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup));
|
||||||
} else {
|
} else {
|
||||||
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
|
GenomeLoc location = getLocation();
|
||||||
|
Map<String,AbstractReadBackedPileup<ReadBackedPileup,PileupElement>> fullPileup = new HashMap<String,AbstractReadBackedPileup<ReadBackedPileup,PileupElement>>();
|
||||||
|
|
||||||
// todo -- performance problem -- should be lazy, really
|
// todo -- performance problem -- should be lazy, really
|
||||||
for ( SAMRecordState state : readStates ) {
|
for(String sampleName: sampleNames) {
|
||||||
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
Iterator<SAMRecordState> iterator = readStates.iteratorForSample(sampleName);
|
||||||
if ( filterRead(state.getRead(), getLocation().getStart(), filters ) ) {
|
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
|
||||||
discarded_bases++;
|
|
||||||
//printStatus("Adaptor bases", discarded_adaptor_bases);
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
observed_bases++;
|
|
||||||
pile.add(new PileupElement(state.getRead(), state.getReadOffset()));
|
|
||||||
size++;
|
|
||||||
}
|
|
||||||
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
|
||||||
size++;
|
|
||||||
pile.add(new PileupElement(state.getRead(), -1));
|
|
||||||
nDeletions++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// todo -- this looks like a bug w.r.t. including reads with deletion at loci -- MAD 05/27/10
|
size = 0;
|
||||||
if ( state.getRead().getMappingQuality() == 0 ) {
|
nDeletions = 0;
|
||||||
nMQ0Reads++;
|
nInsertions = 0;
|
||||||
|
nMQ0Reads = 0;
|
||||||
|
|
||||||
|
while(iterator.hasNext()) {
|
||||||
|
SAMRecordState state = iterator.next();
|
||||||
|
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||||
|
if ( filterRead(state.getRead(), location.getStart(), filters ) ) {
|
||||||
|
discarded_bases++;
|
||||||
|
//printStatus("Adaptor bases", discarded_adaptor_bases);
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
observed_bases++;
|
||||||
|
pile.add(new PileupElement(state.getRead(), state.getReadOffset()));
|
||||||
|
size++;
|
||||||
|
}
|
||||||
|
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||||
|
size++;
|
||||||
|
pile.add(new PileupElement(state.getRead(), -1));
|
||||||
|
nDeletions++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// todo -- this looks like a bug w.r.t. including reads with deletion at loci -- MAD 05/27/10
|
||||||
|
if ( state.getRead().getMappingQuality() == 0 ) {
|
||||||
|
nMQ0Reads++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
if( pile.size() != 0 ) fullPileup.put(sampleName,new ReadBackedPileupImpl(location,pile,size,nDeletions,nMQ0Reads));
|
||||||
}
|
}
|
||||||
|
|
||||||
GenomeLoc loc = getLocation();
|
|
||||||
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||||
// if we got reads with non-D/N over the current position, we are done
|
// if we got reads with non-D/N over the current position, we are done
|
||||||
if ( pile.size() != 0 ) nextAlignmentContext = new AlignmentContext(loc, new ReadBackedPileupImpl(loc, pile, size, nDeletions, nMQ0Reads));
|
if ( !fullPileup.isEmpty() ) nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location,fullPileup));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean filterRead(SAMRecord rec, long pos, List<LocusIteratorFilter> filters) {
|
|
||||||
for ( LocusIteratorFilter filter : filters ) {
|
|
||||||
if ( filter.filterOut(rec, pos) ) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void printStatus(final String title, long n) {
|
|
||||||
if ( n % 10000 == 0 )
|
|
||||||
System.out.printf("%s %d / %d = %.2f%n", title, n, observed_bases, 100.0 * n / (observed_bases + 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private void collectPendingReads(int maxReads) {
|
|
||||||
//if (DEBUG) {
|
|
||||||
// logger.debug(String.format("entering collectPendingReads..., hasNext=%b", it.hasNext()));
|
|
||||||
// printState();
|
|
||||||
//}
|
|
||||||
GenomeLoc location = null;
|
|
||||||
|
|
||||||
// the farthest right a read extends
|
|
||||||
Integer rightMostEnd = -1;
|
|
||||||
|
|
||||||
int curSize = readStates.size(); // simple performance improvement -- avoids unnecessary size() operation
|
|
||||||
while (it.hasNext()) {
|
|
||||||
SAMRecord read = it.next();
|
|
||||||
if (readIsPastCurrentPosition(read)) {
|
|
||||||
// We've collected up all the reads that span this region
|
|
||||||
it.pushback(read);
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
if (curSize < maxReads) {
|
|
||||||
SAMRecordState state = new SAMRecordState(read, readInfo.generateExtendedEvents());
|
|
||||||
state.stepForwardOnGenome();
|
|
||||||
readStates.add(state);
|
|
||||||
curSize++;
|
|
||||||
if (state.hadIndel()) hasExtendedEvents = true;
|
|
||||||
//if (DEBUG) logger.debug(String.format(" ... added read %s", read.getReadName()));
|
|
||||||
} else {
|
|
||||||
if (location == null)
|
|
||||||
location = GenomeLocParser.createGenomeLoc(read);
|
|
||||||
rightMostEnd = (read.getAlignmentEnd() > rightMostEnd) ? read.getAlignmentEnd() : rightMostEnd;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if (location != null)
|
|
||||||
overflowTracker.exceeded(GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart(),rightMostEnd),
|
|
||||||
curSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
// fast testing of position
|
// fast testing of position
|
||||||
private boolean readIsPastCurrentPosition(SAMRecord read) {
|
private boolean readIsPastCurrentPosition(SAMRecord read) {
|
||||||
if ( readStates.isEmpty() )
|
if ( readStates.isEmpty() )
|
||||||
|
|
@ -506,20 +484,35 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static boolean filterRead(SAMRecord rec, long pos, List<LocusIteratorFilter> filters) {
|
||||||
|
for ( LocusIteratorFilter filter : filters ) {
|
||||||
|
if ( filter.filterOut(rec, pos) ) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printStatus(final String title, long n) {
|
||||||
|
if ( n % 10000 == 0 )
|
||||||
|
System.out.printf("%s %d / %d = %.2f%n", title, n, observed_bases, 100.0 * n / (observed_bases + 1));
|
||||||
|
}
|
||||||
|
|
||||||
private void updateReadStates() {
|
private void updateReadStates() {
|
||||||
Iterator<SAMRecordState> it = readStates.iterator();
|
for(String sampleName: sampleNames) {
|
||||||
while ( it.hasNext() ) {
|
Iterator<SAMRecordState> it = readStates.iteratorForSample(sampleName);
|
||||||
SAMRecordState state = it.next();
|
while ( it.hasNext() ) {
|
||||||
CigarOperator op = state.stepForwardOnGenome();
|
SAMRecordState state = it.next();
|
||||||
if ( state.hadIndel() && readInfo.generateExtendedEvents() ) hasExtendedEvents = true;
|
CigarOperator op = state.stepForwardOnGenome();
|
||||||
else {
|
if ( state.hadIndel() && readInfo.generateExtendedEvents() ) hasExtendedEvents = true;
|
||||||
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
|
else {
|
||||||
// already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe
|
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
|
||||||
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
|
// already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe
|
||||||
if ( op == null ) { // we've stepped off the end of the object
|
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
|
||||||
//if (DEBUG) logger.debug(String.format(" removing read %s at %d", state.getRead().getReadName(), state.getRead().getAlignmentStart()));
|
if ( op == null ) { // we've stepped off the end of the object
|
||||||
it.remove();
|
//if (DEBUG) logger.debug(String.format(" removing read %s at %d", state.getRead().getReadName(), state.getRead().getAlignmentStart()));
|
||||||
|
it.remove();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -544,5 +537,561 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
public LocusOverflowTracker getLocusOverflowTracker() {
|
public LocusOverflowTracker getLocusOverflowTracker() {
|
||||||
return this.overflowTracker;
|
return this.overflowTracker;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private class ReadStateManager {
|
||||||
|
private final PeekableIterator<SAMRecord> iterator;
|
||||||
|
private final DownsamplingMethod downsamplingMethod;
|
||||||
|
|
||||||
|
private final ReadSelector chainedReadSelector;
|
||||||
|
private final SamplePartitioner samplePartitioner;
|
||||||
|
|
||||||
|
private final Map<String,PerSampleReadStateManager> readStatesBySample = new HashMap<String,PerSampleReadStateManager>();
|
||||||
|
|
||||||
|
private final int targetCoverage;
|
||||||
|
private final int maxReadsAtLocus;
|
||||||
|
|
||||||
|
private int totalReadStates = 0;
|
||||||
|
|
||||||
|
public ReadStateManager(Iterator<SAMRecord> source, DownsamplingMethod downsamplingMethod, int maxReadsAtLocus, Collection<String> sampleNames) {
|
||||||
|
this.iterator = new PeekableIterator<SAMRecord>(source);
|
||||||
|
this.downsamplingMethod = downsamplingMethod;
|
||||||
|
switch(downsamplingMethod.type) {
|
||||||
|
case EXPERIMENTAL_BY_SAMPLE:
|
||||||
|
if(downsamplingMethod.toCoverage == null)
|
||||||
|
throw new StingException("Downsampling coverage (-dcov) must be specified when downsampling by sample");
|
||||||
|
this.targetCoverage = downsamplingMethod.toCoverage;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
this.targetCoverage = Integer.MAX_VALUE;
|
||||||
|
}
|
||||||
|
this.maxReadsAtLocus = maxReadsAtLocus;
|
||||||
|
|
||||||
|
samplePartitioner = new SamplePartitioner(sampleNames);
|
||||||
|
for(String sampleName: sampleNames)
|
||||||
|
readStatesBySample.put(sampleName,new PerSampleReadStateManager());
|
||||||
|
|
||||||
|
ReadSelector primaryReadSelector= samplePartitioner;
|
||||||
|
|
||||||
|
chainedReadSelector = maxReadsAtLocus!=Integer.MAX_VALUE ? new FirstNReadSelector(primaryReadSelector,maxReadsAtLocus) : primaryReadSelector;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Iterator<SAMRecordState> iteratorForSample(final String sampleName) {
|
||||||
|
return new Iterator<SAMRecordState>() {
|
||||||
|
private Iterator<SAMRecordState> wrappedIterator = readStatesBySample.get(sampleName).iterator();
|
||||||
|
|
||||||
|
public boolean hasNext() {
|
||||||
|
return wrappedIterator.hasNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMRecordState next() {
|
||||||
|
return wrappedIterator.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void remove() {
|
||||||
|
wrappedIterator.remove();
|
||||||
|
totalReadStates--;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEmpty() {
|
||||||
|
return totalReadStates == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
return totalReadStates;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMRecordState getFirst() {
|
||||||
|
for(String sampleName: sampleNames) {
|
||||||
|
PerSampleReadStateManager reads = readStatesBySample.get(sampleName);
|
||||||
|
if(!reads.isEmpty())
|
||||||
|
return reads.peek();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasNext() {
|
||||||
|
return totalReadStates > 0 || iterator.hasNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void collectPendingReads() {
|
||||||
|
if(!iterator.hasNext())
|
||||||
|
return;
|
||||||
|
|
||||||
|
if(readStates.size() == 0) {
|
||||||
|
int firstContigIndex = iterator.peek().getReferenceIndex();
|
||||||
|
int firstAlignmentStart = iterator.peek().getAlignmentStart();
|
||||||
|
while(iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) {
|
||||||
|
chainedReadSelector.submitRead(iterator.next());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Fast fail in the case that the read is past the current position.
|
||||||
|
if(readIsPastCurrentPosition(iterator.peek()))
|
||||||
|
return;
|
||||||
|
|
||||||
|
while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) {
|
||||||
|
chainedReadSelector.submitRead(iterator.next());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
chainedReadSelector.complete();
|
||||||
|
|
||||||
|
for(String sampleName: sampleNames) {
|
||||||
|
ReadSelector aggregator = samplePartitioner.getSelectedReads(sampleName);
|
||||||
|
|
||||||
|
Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads());
|
||||||
|
|
||||||
|
PerSampleReadStateManager statesBySample = readStatesBySample.get(sampleName);
|
||||||
|
int numReads = statesBySample.size();
|
||||||
|
|
||||||
|
if(numReads+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.NONE) {
|
||||||
|
long readLimit = aggregator.getNumReadsSeen();
|
||||||
|
boolean mrlViolation = false;
|
||||||
|
if(readLimit > maxReadsAtLocus-totalReadStates) {
|
||||||
|
readLimit = maxReadsAtLocus-totalReadStates;
|
||||||
|
mrlViolation = true;
|
||||||
|
}
|
||||||
|
addReadsToSample(statesBySample,newReads,readLimit,mrlViolation);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
int[] counts = statesBySample.getCountsPerAlignmentStart();
|
||||||
|
int[] updatedCounts = new int[counts.length];
|
||||||
|
System.arraycopy(counts,0,updatedCounts,0,counts.length);
|
||||||
|
|
||||||
|
boolean readPruned = true;
|
||||||
|
while(numReads+newReads.size()>targetCoverage && readPruned) {
|
||||||
|
readPruned = false;
|
||||||
|
for(int alignmentStart=updatedCounts.length-1;numReads+newReads.size()>targetCoverage&&alignmentStart>=0;alignmentStart--) {
|
||||||
|
if(updatedCounts[alignmentStart] > 1) {
|
||||||
|
updatedCounts[alignmentStart]--;
|
||||||
|
numReads--;
|
||||||
|
readPruned = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(numReads == targetCoverage) {
|
||||||
|
updatedCounts[0]--;
|
||||||
|
numReads--;
|
||||||
|
}
|
||||||
|
|
||||||
|
BitSet toPurge = new BitSet(readStates.size());
|
||||||
|
int readOffset = 0;
|
||||||
|
|
||||||
|
for(int i = 0; i < updatedCounts.length; i++) {
|
||||||
|
int n = counts[i];
|
||||||
|
int k = updatedCounts[i];
|
||||||
|
|
||||||
|
for(Integer purgedElement: MathUtils.sampleIndicesWithoutReplacement(n,n-k))
|
||||||
|
toPurge.set(readOffset+purgedElement);
|
||||||
|
|
||||||
|
readOffset += counts[i];
|
||||||
|
}
|
||||||
|
statesBySample.purge(toPurge);
|
||||||
|
addReadsToSample(statesBySample,newReads,targetCoverage-numReads,false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
chainedReadSelector.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add reads with the given sample name to the given hanger entry.
|
||||||
|
* @param readStates The list of read states to add this collection of reads.
|
||||||
|
* @param reads Reads to add. Selected reads will be pulled from this source.
|
||||||
|
* @param maxReads Maximum number of reads to add.
|
||||||
|
*/
|
||||||
|
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads, final long maxReads, boolean atMaxReadsAtLocusLimit) {
|
||||||
|
if(reads.isEmpty())
|
||||||
|
return;
|
||||||
|
|
||||||
|
GenomeLoc location = null;
|
||||||
|
|
||||||
|
// the farthest right a read extends
|
||||||
|
Integer rightMostEnd = -1;
|
||||||
|
|
||||||
|
Collection<SAMRecordState> newReadStates = new LinkedList<SAMRecordState>();
|
||||||
|
int readCount = 0;
|
||||||
|
for(SAMRecord read: reads) {
|
||||||
|
if(readCount < maxReads) {
|
||||||
|
SAMRecordState state = new SAMRecordState(read, readInfo.generateExtendedEvents());
|
||||||
|
state.stepForwardOnGenome();
|
||||||
|
newReadStates.add(state);
|
||||||
|
// TODO: What if we downsample the extended events away?
|
||||||
|
if (state.hadIndel()) hasExtendedEvents = true;
|
||||||
|
readCount++;
|
||||||
|
}
|
||||||
|
if(atMaxReadsAtLocusLimit) {
|
||||||
|
if (location == null)
|
||||||
|
location = GenomeLocParser.createGenomeLoc(read);
|
||||||
|
rightMostEnd = (read.getAlignmentEnd() > rightMostEnd) ? read.getAlignmentEnd() : rightMostEnd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
readStates.addStatesAtNextAlignmentStart(newReadStates);
|
||||||
|
|
||||||
|
if (location != null)
|
||||||
|
overflowTracker.exceeded(GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart(),rightMostEnd),
|
||||||
|
totalReadStates);
|
||||||
|
}
|
||||||
|
|
||||||
|
private class PerSampleReadStateManager implements Iterable<SAMRecordState> {
|
||||||
|
private final Queue<SAMRecordState> readStates = new LinkedList<SAMRecordState>();
|
||||||
|
private final Deque<Counter> readStateCounter = new LinkedList<Counter>();
|
||||||
|
|
||||||
|
public void addStatesAtNextAlignmentStart(Collection<SAMRecordState> states) {
|
||||||
|
readStates.addAll(states);
|
||||||
|
readStateCounter.add(new Counter(states.size()));
|
||||||
|
totalReadStates += states.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEmpty() {
|
||||||
|
return readStates.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMRecordState peek() {
|
||||||
|
return readStates.peek();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
return readStates.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int[] getCountsPerAlignmentStart() {
|
||||||
|
int[] counts = new int[readStateCounter.size()];
|
||||||
|
int index = 0;
|
||||||
|
for(Counter counter: readStateCounter)
|
||||||
|
counts[index++] = counter.getCount();
|
||||||
|
return counts;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Iterator<SAMRecordState> iterator() {
|
||||||
|
return new Iterator<SAMRecordState>() {
|
||||||
|
private Iterator<SAMRecordState> wrappedIterator = readStates.iterator();
|
||||||
|
|
||||||
|
public boolean hasNext() {
|
||||||
|
return wrappedIterator.hasNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMRecordState next() {
|
||||||
|
return wrappedIterator.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void remove() {
|
||||||
|
wrappedIterator.remove();
|
||||||
|
Counter counter = readStateCounter.peek();
|
||||||
|
counter.decrement();
|
||||||
|
if(counter.getCount() == 0)
|
||||||
|
readStateCounter.remove();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Purge the given elements from the bitset. If an element in the bitset is true, purge
|
||||||
|
* the corresponding read state.
|
||||||
|
* @param elements bits from the set to purge.
|
||||||
|
*/
|
||||||
|
public void purge(final BitSet elements) {
|
||||||
|
if(elements.isEmpty() || readStates.isEmpty()) return;
|
||||||
|
|
||||||
|
Iterator<SAMRecordState> readStateIterator = readStates.iterator();
|
||||||
|
|
||||||
|
Iterator<Counter> counterIterator = readStateCounter.iterator();
|
||||||
|
Counter currentCounter = counterIterator.next();
|
||||||
|
|
||||||
|
int readIndex = 0;
|
||||||
|
long alignmentStartCounter = currentCounter.getCount();
|
||||||
|
|
||||||
|
int toPurge = elements.nextSetBit(0);
|
||||||
|
int removedCount = 0;
|
||||||
|
|
||||||
|
while(readStateIterator.hasNext() && toPurge >= 0) {
|
||||||
|
readStateIterator.next();
|
||||||
|
|
||||||
|
if(readIndex == toPurge) {
|
||||||
|
readStateIterator.remove();
|
||||||
|
currentCounter.decrement();
|
||||||
|
if(currentCounter.getCount() == 0)
|
||||||
|
counterIterator.remove();
|
||||||
|
removedCount++;
|
||||||
|
toPurge = elements.nextSetBit(toPurge+1);
|
||||||
|
}
|
||||||
|
|
||||||
|
readIndex++;
|
||||||
|
alignmentStartCounter--;
|
||||||
|
if(alignmentStartCounter == 0 && counterIterator.hasNext()) {
|
||||||
|
currentCounter = counterIterator.next();
|
||||||
|
alignmentStartCounter = currentCounter.getCount();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
totalReadStates -= removedCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Note: assuming that, whenever we downsample, we downsample to an integer capacity.
|
||||||
|
*/
|
||||||
|
private class Counter {
|
||||||
|
private int count;
|
||||||
|
|
||||||
|
public Counter(int count) {
|
||||||
|
this.count = count;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getCount() {
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void decrement() {
|
||||||
|
count--;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Selects reads passed to it based on a criteria decided through inheritance.
|
||||||
|
* TODO: This is a temporary abstraction until we can get rid of this downsampling implementation and the mrl option. Get rid of this.
|
||||||
|
*/
|
||||||
|
interface ReadSelector {
|
||||||
|
/**
|
||||||
|
* All previous selectors in the chain have allowed this read. Submit it to this selector for consideration.
|
||||||
|
* @param read the read to evaluate.
|
||||||
|
*/
|
||||||
|
public void submitRead(SAMRecord read);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A previous selector has deemed this read unfit. Notify this selector so that this selector's counts are valid.
|
||||||
|
* @param read the read previously rejected.
|
||||||
|
*/
|
||||||
|
public void notifyReadRejected(SAMRecord read);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Signal the selector that read additions are complete.
|
||||||
|
*/
|
||||||
|
public void complete();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve the number of reads seen by this selector so far.
|
||||||
|
* @return number of reads seen.
|
||||||
|
*/
|
||||||
|
public long getNumReadsSeen();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the number of reads accepted by this selector so far.
|
||||||
|
* @return number of reads selected.
|
||||||
|
*/
|
||||||
|
public long getNumReadsSelected();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the reads selected by this selector.
|
||||||
|
* @return collection of reads selected by this selector.
|
||||||
|
*/
|
||||||
|
public Collection<SAMRecord> getSelectedReads();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reset this collection to its pre-gathered state.
|
||||||
|
*/
|
||||||
|
public void reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Choose the first N reads from the submitted set.
|
||||||
|
*/
|
||||||
|
class FirstNReadSelector implements ReadSelector {
|
||||||
|
private final ReadSelector chainedSelector;
|
||||||
|
|
||||||
|
private final Collection<SAMRecord> selectedReads = new LinkedList<SAMRecord>();
|
||||||
|
private final long readLimit;
|
||||||
|
private long readsSeen = 0;
|
||||||
|
|
||||||
|
public FirstNReadSelector(ReadSelector chainedSelector, long readLimit) {
|
||||||
|
this.chainedSelector = chainedSelector;
|
||||||
|
this.readLimit = readLimit;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void submitRead(SAMRecord read) {
|
||||||
|
if(readsSeen < readLimit) {
|
||||||
|
selectedReads.add(read);
|
||||||
|
if(chainedSelector != null)
|
||||||
|
chainedSelector.submitRead(read);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if(chainedSelector != null)
|
||||||
|
chainedSelector.notifyReadRejected(read);
|
||||||
|
readsSeen++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void notifyReadRejected(SAMRecord read) {
|
||||||
|
if(chainedSelector != null)
|
||||||
|
chainedSelector.notifyReadRejected(read);
|
||||||
|
readsSeen++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void complete() {
|
||||||
|
if(chainedSelector != null)
|
||||||
|
chainedSelector.complete();
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getNumReadsSeen() {
|
||||||
|
return readsSeen;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getNumReadsSelected() {
|
||||||
|
return selectedReads.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Collection<SAMRecord> getSelectedReads() {
|
||||||
|
return selectedReads;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void reset() {
|
||||||
|
selectedReads.clear();
|
||||||
|
readsSeen = 0;
|
||||||
|
if(chainedSelector != null)
|
||||||
|
chainedSelector.reset();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Select N reads randomly from the input stream.
|
||||||
|
*/
|
||||||
|
class NRandomReadSelector implements ReadSelector {
|
||||||
|
private final ReservoirDownsampler<SAMRecord> reservoir;
|
||||||
|
private final ReadSelector chainedSelector;
|
||||||
|
private long readsSeen = 0;
|
||||||
|
|
||||||
|
public NRandomReadSelector(ReadSelector chainedSelector, long readLimit) {
|
||||||
|
this.reservoir = new ReservoirDownsampler<SAMRecord>((int)readLimit);
|
||||||
|
this.chainedSelector = chainedSelector;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void submitRead(SAMRecord read) {
|
||||||
|
SAMRecord displaced = reservoir.add(read);
|
||||||
|
if(displaced != null && chainedSelector != null)
|
||||||
|
chainedSelector.notifyReadRejected(read);
|
||||||
|
readsSeen++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void notifyReadRejected(SAMRecord read) {
|
||||||
|
readsSeen++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void complete() {
|
||||||
|
for(SAMRecord read: reservoir.getDownsampledContents())
|
||||||
|
chainedSelector.submitRead(read);
|
||||||
|
if(chainedSelector != null)
|
||||||
|
chainedSelector.complete();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public long getNumReadsSeen() {
|
||||||
|
return readsSeen;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getNumReadsSelected() {
|
||||||
|
return reservoir.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Collection<SAMRecord> getSelectedReads() {
|
||||||
|
return reservoir.getDownsampledContents();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void reset() {
|
||||||
|
reservoir.clear();
|
||||||
|
if(chainedSelector != null)
|
||||||
|
chainedSelector.reset();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class SamplePartitioner implements ReadSelector {
|
||||||
|
private final Map<String,SampleStorage> readsBySample;
|
||||||
|
private long readsSeen = 0;
|
||||||
|
|
||||||
|
public SamplePartitioner(Collection<String> sampleNames) {
|
||||||
|
readsBySample = new HashMap<String,SampleStorage>();
|
||||||
|
for(String sampleName: sampleNames)
|
||||||
|
readsBySample.put(sampleName,new SampleStorage());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void submitRead(SAMRecord read) {
|
||||||
|
String sampleName = read.getReadGroup()!=null ? read.getReadGroup().getSample() : null;
|
||||||
|
if(readsBySample.containsKey(sampleName))
|
||||||
|
readsBySample.get(sampleName).submitRead(read);
|
||||||
|
readsSeen++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void notifyReadRejected(SAMRecord read) {
|
||||||
|
String sampleName = read.getReadGroup()!=null ? read.getReadGroup().getSample() : null;
|
||||||
|
if(readsBySample.containsKey(sampleName))
|
||||||
|
readsBySample.get(sampleName).notifyReadRejected(read);
|
||||||
|
readsSeen++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void complete() {
|
||||||
|
// NO-OP.
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getNumReadsSeen() {
|
||||||
|
return readsSeen;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getNumReadsSelected() {
|
||||||
|
return readsSeen;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Collection<SAMRecord> getSelectedReads() {
|
||||||
|
throw new UnsupportedOperationException("Cannot directly get selected reads from a read partitioner.");
|
||||||
|
}
|
||||||
|
|
||||||
|
public ReadSelector getSelectedReads(String sampleName) {
|
||||||
|
if(!readsBySample.containsKey(sampleName))
|
||||||
|
throw new NoSuchElementException("Sample name not found");
|
||||||
|
return readsBySample.get(sampleName);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void reset() {
|
||||||
|
for(SampleStorage storage: readsBySample.values())
|
||||||
|
storage.reset();
|
||||||
|
readsSeen = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private class SampleStorage implements ReadSelector {
|
||||||
|
private Collection<SAMRecord> reads = new LinkedList<SAMRecord>();
|
||||||
|
private long readsSeen = 0;
|
||||||
|
|
||||||
|
public void submitRead(SAMRecord read) {
|
||||||
|
reads.add(read);
|
||||||
|
readsSeen++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void notifyReadRejected(SAMRecord read) {
|
||||||
|
readsSeen++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void complete() {
|
||||||
|
// NO-OP.
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getNumReadsSeen() {
|
||||||
|
return readsSeen;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getNumReadsSelected() {
|
||||||
|
return readsSeen;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Collection<SAMRecord> getSelectedReads() {
|
||||||
|
return reads;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void reset() {
|
||||||
|
reads.clear();
|
||||||
|
readsSeen = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@ public class PileupWalkerIntegrationTest extends WalkerTest {
|
||||||
String gatk_args = "-T Pileup -I " + validationDataLocation + "FHS_Pileup_Test.bam "
|
String gatk_args = "-T Pileup -I " + validationDataLocation + "FHS_Pileup_Test.bam "
|
||||||
+ "-R " + seqLocation + "references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"
|
+ "-R " + seqLocation + "references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"
|
||||||
+ " -L chr15:46,347,148 -o %s";
|
+ " -L chr15:46,347,148 -o %s";
|
||||||
String expected_md5 = "d23032d10111755ccb1c1b01e6e097a7";
|
String expected_md5 = "052187dd2bf2516a027578c8775856a8";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(expected_md5));
|
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(expected_md5));
|
||||||
executeTest("Testing the standard (no-indel) pileup on three merged FHS pools with 27 deletions in 969 bases", spec);
|
executeTest("Testing the standard (no-indel) pileup on three merged FHS pools with 27 deletions in 969 bases", spec);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue