Delete old LocusIteratorByState, leaving only new LIBS and legacy
This commit is contained in:
parent
bd03511e35
commit
cc0c1b752a
|
|
@ -113,6 +113,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
// Use the legacy version of LocusIteratorByState if legacy downsampling was requested:
|
||||
libs = ! sourceInfo.getDownsamplingMethod().useLegacyDownsampler ? new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleNames) : null;
|
||||
this.sourceIterator = sourceInfo.getDownsamplingMethod().useLegacyDownsampler
|
||||
// TODO -- remove me when we collapse legacy engine fork
|
||||
? new PeekableIterator<AlignmentContext>(new LegacyLocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleNames))
|
||||
: new PeekableIterator<AlignmentContext>(libs);
|
||||
|
||||
|
|
@ -120,7 +121,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
}
|
||||
|
||||
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals ) {
|
||||
this(shard, genomeLocParser, iterator, intervals, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
this(shard, genomeLocParser, iterator, intervals, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
}
|
||||
|
||||
public Iterator<WindowMakerIterator> iterator() {
|
||||
|
|
|
|||
|
|
@ -403,4 +403,14 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
throw new IllegalStateException("Tried to create a pileup for read " + read + " with offset " + offset +
|
||||
" but we never saw such an offset in the alignment state machine");
|
||||
}
|
||||
|
||||
/**
|
||||
* For testing only. Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list
|
||||
* for the system.
|
||||
*/
|
||||
public static List<String> sampleListForSAMWithoutReadGroups() {
|
||||
List<String> samples = new ArrayList<String>();
|
||||
samples.add(null);
|
||||
return samples;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,326 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.locusiterator.old;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.downsampling.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.locusiterator.LIBSDownsamplingInfo;
|
||||
import org.broadinstitute.sting.utils.locusiterator.LocusIterator;
|
||||
import org.broadinstitute.sting.utils.locusiterator.legacy.LegacyLocusIteratorByState;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Iterator that traverses a SAM File, accumulating information on a per-locus basis
|
||||
*/
|
||||
public class LocusIteratorByState extends LocusIterator {
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
private static Logger logger = Logger.getLogger(LegacyLocusIteratorByState.class);
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// member fields
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Used to create new GenomeLocs.
|
||||
*/
|
||||
private final GenomeLocParser genomeLocParser;
|
||||
private final ArrayList<String> samples;
|
||||
private final ReadStateManager readStates;
|
||||
private final boolean includeReadsWithDeletionAtLoci;
|
||||
|
||||
private AlignmentContext nextAlignmentContext;
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// constructors and other basic operations
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public LocusIteratorByState(final Iterator<SAMRecord> samIterator,
|
||||
final ReadProperties readInformation,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final Collection<String> samples) {
|
||||
this(samIterator,
|
||||
toDownsamplingInfo(readInformation),
|
||||
readInformation.includeReadsWithDeletionAtLoci(),
|
||||
genomeLocParser,
|
||||
samples,
|
||||
readInformation.keepUniqueReadListInLIBS());
|
||||
}
|
||||
|
||||
protected LocusIteratorByState(final Iterator<SAMRecord> samIterator,
|
||||
final LIBSDownsamplingInfo downsamplingInfo,
|
||||
final boolean includeReadsWithDeletionAtLoci,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final Collection<String> samples,
|
||||
final boolean maintainUniqueReadsList ) {
|
||||
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.samples = new ArrayList<String>(samples);
|
||||
this.readStates = new ReadStateManager(samIterator, this.samples, downsamplingInfo, maintainUniqueReadsList);
|
||||
|
||||
// currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
|
||||
// there's no read data. So we need to throw this error only when samIterator.hasNext() is true
|
||||
if (this.samples.isEmpty() && samIterator.hasNext()) {
|
||||
throw new IllegalArgumentException("samples list must not be empty");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<AlignmentContext> iterator() {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
lazyLoadNextAlignmentContext();
|
||||
return nextAlignmentContext != null;
|
||||
}
|
||||
|
||||
private GenomeLoc getLocation() {
|
||||
return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// next() routine and associated collection operations
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public AlignmentContext next() {
|
||||
lazyLoadNextAlignmentContext();
|
||||
if (!hasNext())
|
||||
throw new NoSuchElementException("LocusIteratorByState: out of elements.");
|
||||
AlignmentContext currentAlignmentContext = nextAlignmentContext;
|
||||
nextAlignmentContext = null;
|
||||
return currentAlignmentContext;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the next alignment context from the given state. Note that this is implemented as a lazy load method.
|
||||
* nextAlignmentContext MUST BE null in order for this method to advance to the next entry.
|
||||
*/
|
||||
private void lazyLoadNextAlignmentContext() {
|
||||
while (nextAlignmentContext == null && readStates.hasNext()) {
|
||||
readStates.collectPendingReads();
|
||||
|
||||
final GenomeLoc location = getLocation();
|
||||
final Map<String, ReadBackedPileupImpl> fullPileup = new HashMap<String, ReadBackedPileupImpl>();
|
||||
|
||||
// TODO: How can you determine here whether the current pileup has been downsampled?
|
||||
boolean hasBeenSampled = false;
|
||||
|
||||
for (final String sample : samples) {
|
||||
final Iterator<SAMRecordAlignmentState> iterator = readStates.iterator(sample);
|
||||
final List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
|
||||
|
||||
int size = 0; // number of elements in this sample's pileup
|
||||
int nDeletions = 0; // number of deletions in this sample's pileup
|
||||
int nMQ0Reads = 0; // number of MQ0 reads in this sample's pileup (warning: current implementation includes N bases that are MQ0)
|
||||
|
||||
while (iterator.hasNext()) {
|
||||
final SAMRecordAlignmentState state = iterator.next(); // state object with the read/offset information
|
||||
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
|
||||
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
|
||||
final CigarElement nextElement = state.peekForwardOnGenome(); // next cigar element
|
||||
final CigarElement lastElement = state.peekBackwardOnGenome(); // last cigar element
|
||||
final boolean isSingleElementCigar = nextElement == lastElement;
|
||||
final CigarOperator nextOp = nextElement.getOperator(); // next cigar operator
|
||||
final CigarOperator lastOp = lastElement.getOperator(); // last cigar operator
|
||||
int readOffset = state.getReadOffset(); // the base offset on this read
|
||||
|
||||
final boolean isBeforeDeletion = nextOp == CigarOperator.DELETION;
|
||||
final boolean isAfterDeletion = lastOp == CigarOperator.DELETION;
|
||||
final boolean isBeforeInsertion = nextOp == CigarOperator.INSERTION;
|
||||
final boolean isAfterInsertion = lastOp == CigarOperator.INSERTION && !isSingleElementCigar;
|
||||
final boolean isNextToSoftClip = nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart());
|
||||
|
||||
int nextElementLength = nextElement.getLength();
|
||||
|
||||
if (op == CigarOperator.N) // N's are never added to any pileup
|
||||
continue;
|
||||
|
||||
if (op == CigarOperator.D) {
|
||||
// TODO -- LIBS is totally busted for deletions so that reads with Ds right before Is in their CIGAR are broken; must fix
|
||||
if (includeReadsWithDeletionAtLoci) { // only add deletions to the pileup if we are authorized to do so
|
||||
pile.add(new PileupElement(read, readOffset, true, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, null, nextOp == CigarOperator.D ? nextElementLength : -1));
|
||||
size++;
|
||||
nDeletions++;
|
||||
if (read.getMappingQuality() == 0)
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!filterBaseInRead(read, location.getStart())) {
|
||||
String insertedBaseString = null;
|
||||
if (nextOp == CigarOperator.I) {
|
||||
final int insertionOffset = isSingleElementCigar ? 0 : 1;
|
||||
// TODO -- someone please implement a better fix for the single element insertion CIGAR!
|
||||
if (isSingleElementCigar)
|
||||
readOffset -= (nextElement.getLength() - 1); // LIBS has passed over the insertion bases!
|
||||
insertedBaseString = new String(Arrays.copyOfRange(read.getReadBases(), readOffset + insertionOffset, readOffset + insertionOffset + nextElement.getLength()));
|
||||
}
|
||||
|
||||
pile.add(new PileupElement(read, readOffset, false, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, insertedBaseString, nextElementLength));
|
||||
size++;
|
||||
if (read.getMappingQuality() == 0)
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pile.size() != 0) // if this pileup added at least one base, add it to the full pileup
|
||||
fullPileup.put(sample, new ReadBackedPileupImpl(location, pile, size, nDeletions, nMQ0Reads));
|
||||
}
|
||||
|
||||
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||
if (!fullPileup.isEmpty()) // if we got reads with non-D/N over the current position, we are done
|
||||
nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location, fullPileup), hasBeenSampled);
|
||||
}
|
||||
}
|
||||
|
||||
private void updateReadStates() {
|
||||
for (final String sample : samples) {
|
||||
Iterator<SAMRecordAlignmentState> it = readStates.iterator(sample);
|
||||
while (it.hasNext()) {
|
||||
SAMRecordAlignmentState state = it.next();
|
||||
CigarOperator op = state.stepForwardOnGenome();
|
||||
if (op == null) {
|
||||
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
|
||||
// already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
|
||||
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
|
||||
it.remove(); // we've stepped off the end of the object
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// getting the list of reads
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Transfer current list of all unique reads that have ever been used in any pileup, clearing old list
|
||||
*
|
||||
* This list is guaranteed to only contain unique reads, even across calls to the this function. It is
|
||||
* literally the unique set of reads ever seen.
|
||||
*
|
||||
* The list occurs in the same order as they are encountered in the underlying iterator.
|
||||
*
|
||||
* Takes the maintained list of submitted reads, and transfers it to the caller of this
|
||||
* function. The old list of set to a new, cleanly allocated list so the caller officially
|
||||
* owns the list returned by this call. This is the only way to clear the tracking
|
||||
* of submitted reads, if enabled.
|
||||
*
|
||||
* The purpose of this function is allow users of LIBS to keep track of all of the reads pulled off the
|
||||
* underlying SAMRecord iterator and that appeared at any point in the list of SAMRecordAlignmentState for
|
||||
* any reads. This function is intended to allow users to efficiently reconstruct the unique set of reads
|
||||
* used across all pileups. This is necessary for LIBS to handle because attempting to do
|
||||
* so from the pileups coming out of LIBS is extremely expensive.
|
||||
*
|
||||
* This functionality is only available if LIBS was created with the argument to track the reads
|
||||
*
|
||||
* @throws UnsupportedOperationException if called when keepingSubmittedReads is false
|
||||
*
|
||||
* @return the current list
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public List<SAMRecord> transferReadsFromAllPreviousPileups() {
|
||||
return readStates.transferSubmittedReads();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the underlying list of tracked reads. For testing only
|
||||
* @return a non-null list
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
protected List<SAMRecord> getReadsFromAllPreviousPileups() {
|
||||
return readStates.getSubmittedReads();
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// utility functions
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Generic place to put per-base filters appropriate to LocusIteratorByState
|
||||
*
|
||||
* @param rec
|
||||
* @param pos
|
||||
* @return
|
||||
*/
|
||||
private boolean filterBaseInRead(GATKSAMRecord rec, long pos) {
|
||||
return ReadUtils.isBaseInsideAdaptor(rec, pos);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a LIBSDownsamplingInfo object from the requested info in ReadProperties
|
||||
*
|
||||
* LIBS will invoke the Reservoir and Leveling downsamplers on the read stream if we're
|
||||
* downsampling to coverage by sample. SAMDataSource will have refrained from applying
|
||||
* any downsamplers to the read stream in this case, in the expectation that LIBS will
|
||||
* manage the downsampling. The reason for this is twofold: performance (don't have to
|
||||
* split/re-assemble the read stream in SAMDataSource), and to enable partial downsampling
|
||||
* of reads (eg., using half of a read, and throwing the rest away).
|
||||
*
|
||||
* @param readInfo GATK engine information about what should be done to the reads
|
||||
* @return a LIBS specific info holder about downsampling only
|
||||
*/
|
||||
private static LIBSDownsamplingInfo toDownsamplingInfo(final ReadProperties readInfo) {
|
||||
final boolean performDownsampling = readInfo.getDownsamplingMethod() != null &&
|
||||
readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
|
||||
readInfo.getDownsamplingMethod().toCoverage != null;
|
||||
final int coverage = performDownsampling ? readInfo.getDownsamplingMethod().toCoverage : 0;
|
||||
|
||||
return new LIBSDownsamplingInfo(performDownsampling, coverage);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,351 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.locusiterator.old;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.downsampling.Downsampler;
|
||||
import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler;
|
||||
import org.broadinstitute.sting.utils.locusiterator.LIBSDownsamplingInfo;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Manages and updates mapping from sample -> List of SAMRecordAlignmentState
|
||||
*
|
||||
* Optionally can keep track of all of the reads pulled off the iterator and
|
||||
* that appeared at any point in the list of SAMRecordAlignmentState for any reads.
|
||||
* This functionaly is only possible at this stage, as this object does the popping of
|
||||
* reads off the underlying source iterator, and presents only a pileup-like interface
|
||||
* of samples -> SAMRecordAlignmentStates. Reconstructing the unique set of reads
|
||||
* used across all pileups is extremely expensive from that data structure.
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 1/5/13
|
||||
* Time: 2:02 PM
|
||||
*/
|
||||
class ReadStateManager {
|
||||
private final List<String> samples;
|
||||
private final PeekableIterator<SAMRecord> iterator;
|
||||
private final SamplePartitioner samplePartitioner;
|
||||
private final Map<String, PerSampleReadStateManager> readStatesBySample = new HashMap<String, PerSampleReadStateManager>();
|
||||
|
||||
private LinkedList<SAMRecord> submittedReads;
|
||||
private final boolean keepSubmittedReads;
|
||||
|
||||
private int totalReadStates = 0;
|
||||
|
||||
public ReadStateManager(final Iterator<SAMRecord> source,
|
||||
final List<String> samples,
|
||||
final LIBSDownsamplingInfo LIBSDownsamplingInfo,
|
||||
final boolean keepSubmittedReads) {
|
||||
this.samples = samples;
|
||||
this.iterator = new PeekableIterator<SAMRecord>(source);
|
||||
|
||||
this.keepSubmittedReads = keepSubmittedReads;
|
||||
this.submittedReads = new LinkedList<SAMRecord>();
|
||||
|
||||
for (final String sample : samples) {
|
||||
readStatesBySample.put(sample, new PerSampleReadStateManager(LIBSDownsamplingInfo));
|
||||
}
|
||||
|
||||
samplePartitioner = new SamplePartitioner(LIBSDownsamplingInfo, samples);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a iterator over all the reads associated with the given sample. Note that remove() is implemented
|
||||
* for this iterator; if present, total read states will be decremented.
|
||||
*
|
||||
* @param sample The sample.
|
||||
* @return Iterator over the reads associated with that sample.
|
||||
*/
|
||||
public Iterator<SAMRecordAlignmentState> iterator(final String sample) {
|
||||
return new Iterator<SAMRecordAlignmentState>() {
|
||||
private Iterator<SAMRecordAlignmentState> wrappedIterator = readStatesBySample.get(sample).iterator();
|
||||
|
||||
public boolean hasNext() {
|
||||
return wrappedIterator.hasNext();
|
||||
}
|
||||
|
||||
public SAMRecordAlignmentState next() {
|
||||
return wrappedIterator.next();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
wrappedIterator.remove();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return totalReadStates == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the total number of reads in the manager across all samples.
|
||||
*
|
||||
* @return Total number of reads over all samples.
|
||||
*/
|
||||
public int size() {
|
||||
return totalReadStates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the total number of reads in the manager in the given sample.
|
||||
*
|
||||
* @param sample The sample.
|
||||
* @return Total number of reads in the given sample.
|
||||
*/
|
||||
public int size(final String sample) {
|
||||
return readStatesBySample.get(sample).size();
|
||||
}
|
||||
|
||||
public SAMRecordAlignmentState getFirst() {
|
||||
for (final String sample : samples) {
|
||||
PerSampleReadStateManager reads = readStatesBySample.get(sample);
|
||||
if (!reads.isEmpty())
|
||||
return reads.peek();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return totalReadStates > 0 || iterator.hasNext();
|
||||
}
|
||||
|
||||
// fast testing of position
|
||||
private boolean readIsPastCurrentPosition(SAMRecord read) {
|
||||
if (isEmpty())
|
||||
return false;
|
||||
else {
|
||||
SAMRecordAlignmentState state = getFirst();
|
||||
SAMRecord ourRead = state.getRead();
|
||||
return read.getReferenceIndex() > ourRead.getReferenceIndex() || read.getAlignmentStart() > state.getGenomePosition();
|
||||
}
|
||||
}
|
||||
|
||||
public void collectPendingReads() {
|
||||
if (!iterator.hasNext())
|
||||
return;
|
||||
|
||||
// the next record in the stream, peeked as to not remove it from the stream
|
||||
if ( isEmpty() ) {
|
||||
final int firstContigIndex = iterator.peek().getReferenceIndex();
|
||||
final int firstAlignmentStart = iterator.peek().getAlignmentStart();
|
||||
while (iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) {
|
||||
submitRead(iterator.next());
|
||||
}
|
||||
} else {
|
||||
// Fast fail in the case that the read is past the current position.
|
||||
if (readIsPastCurrentPosition(iterator.peek()))
|
||||
return;
|
||||
|
||||
while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) {
|
||||
submitRead(iterator.next());
|
||||
}
|
||||
}
|
||||
|
||||
samplePartitioner.doneSubmittingReads();
|
||||
|
||||
for (final String sample : samples) {
|
||||
Collection<SAMRecord> newReads = samplePartitioner.getReadsForSample(sample);
|
||||
PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
|
||||
addReadsToSample(statesBySample, newReads);
|
||||
}
|
||||
|
||||
samplePartitioner.reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a read to the sample partitioner, potentially adding it to all submitted reads, if appropriate
|
||||
* @param read a non-null read
|
||||
*/
|
||||
@Requires("read != null")
|
||||
protected void submitRead(final SAMRecord read) {
|
||||
if ( keepSubmittedReads )
|
||||
submittedReads.add(read);
|
||||
samplePartitioner.submitRead(read);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transfer current list of submitted reads, clearing old list
|
||||
*
|
||||
* Takes the maintained list of submitted reads, and transfers it to the caller of this
|
||||
* function. The old list of set to a new, cleanly allocated list so the caller officially
|
||||
* owns the list returned by this call. This is the only way to clear the tracking
|
||||
* of submitted reads, if enabled.
|
||||
*
|
||||
* How to use this function:
|
||||
*
|
||||
* while ( doing some work unit, such as creating pileup at some locus ):
|
||||
* interact with ReadStateManager in some way to make work unit
|
||||
* readsUsedInPileup = transferSubmittedReads)
|
||||
*
|
||||
* @throws UnsupportedOperationException if called when keepSubmittedReads is false
|
||||
*
|
||||
* @return the current list of submitted reads
|
||||
*/
|
||||
@Ensures({
|
||||
"result != null",
|
||||
"result != submittedReads" // result and previous submitted reads are not == objects
|
||||
})
|
||||
public List<SAMRecord> transferSubmittedReads() {
|
||||
if ( ! keepSubmittedReads ) throw new UnsupportedOperationException("cannot transferSubmittedReads if you aren't keeping them");
|
||||
|
||||
final List<SAMRecord> prevSubmittedReads = submittedReads;
|
||||
this.submittedReads = new LinkedList<SAMRecord>();
|
||||
|
||||
return prevSubmittedReads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Are we keeping submitted reads, or not?
|
||||
* @return true if we are keeping them, false otherwise
|
||||
*/
|
||||
public boolean isKeepingSubmittedReads() {
|
||||
return keepSubmittedReads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain a pointer to the list of submitted reads.
|
||||
*
|
||||
* This is not a copy of the list; it is shared with this ReadStateManager. It should
|
||||
* not be modified. Updates to this ReadStateManager may change the contains of the
|
||||
* list entirely.
|
||||
*
|
||||
* For testing purposes only.
|
||||
*
|
||||
* Will always be empty if we are are not keepSubmittedReads
|
||||
*
|
||||
* @return a non-null list of reads that have been submitted to this ReadStateManager
|
||||
*/
|
||||
@Ensures({"result != null","keepSubmittedReads || result.isEmpty()"})
|
||||
protected List<SAMRecord> getSubmittedReads() {
|
||||
return submittedReads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add reads with the given sample name to the given hanger entry.
|
||||
*
|
||||
* @param readStates The list of read states to add this collection of reads.
|
||||
* @param reads Reads to add. Selected reads will be pulled from this source.
|
||||
*/
|
||||
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads) {
|
||||
if (reads.isEmpty())
|
||||
return;
|
||||
|
||||
Collection<SAMRecordAlignmentState> newReadStates = new LinkedList<SAMRecordAlignmentState>();
|
||||
|
||||
for (SAMRecord read : reads) {
|
||||
SAMRecordAlignmentState state = new SAMRecordAlignmentState(read);
|
||||
state.stepForwardOnGenome();
|
||||
newReadStates.add(state);
|
||||
}
|
||||
|
||||
readStates.addStatesAtNextAlignmentStart(newReadStates);
|
||||
}
|
||||
|
||||
protected class PerSampleReadStateManager implements Iterable<SAMRecordAlignmentState> {
|
||||
private List<LinkedList<SAMRecordAlignmentState>> readStatesByAlignmentStart = new LinkedList<LinkedList<SAMRecordAlignmentState>>();
|
||||
private final Downsampler<LinkedList<SAMRecordAlignmentState>> levelingDownsampler;
|
||||
|
||||
private int thisSampleReadStates = 0;
|
||||
|
||||
public PerSampleReadStateManager(final LIBSDownsamplingInfo LIBSDownsamplingInfo) {
|
||||
this.levelingDownsampler = LIBSDownsamplingInfo.isPerformDownsampling()
|
||||
? new LevelingDownsampler<LinkedList<SAMRecordAlignmentState>, SAMRecordAlignmentState>(LIBSDownsamplingInfo.getToCoverage())
|
||||
: null;
|
||||
}
|
||||
|
||||
public void addStatesAtNextAlignmentStart(Collection<SAMRecordAlignmentState> states) {
|
||||
if ( states.isEmpty() ) {
|
||||
return;
|
||||
}
|
||||
|
||||
readStatesByAlignmentStart.add(new LinkedList<SAMRecordAlignmentState>(states));
|
||||
thisSampleReadStates += states.size();
|
||||
totalReadStates += states.size();
|
||||
|
||||
if ( levelingDownsampler != null ) {
|
||||
levelingDownsampler.submit(readStatesByAlignmentStart);
|
||||
levelingDownsampler.signalEndOfInput();
|
||||
|
||||
thisSampleReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
|
||||
totalReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
|
||||
|
||||
// use returned List directly rather than make a copy, for efficiency's sake
|
||||
readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems();
|
||||
levelingDownsampler.reset();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return readStatesByAlignmentStart.isEmpty();
|
||||
}
|
||||
|
||||
public SAMRecordAlignmentState peek() {
|
||||
return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek();
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return thisSampleReadStates;
|
||||
}
|
||||
|
||||
public Iterator<SAMRecordAlignmentState> iterator() {
|
||||
return new Iterator<SAMRecordAlignmentState>() {
|
||||
private Iterator<LinkedList<SAMRecordAlignmentState>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
|
||||
private LinkedList<SAMRecordAlignmentState> currentPositionReadStates = null;
|
||||
private Iterator<SAMRecordAlignmentState> currentPositionReadStatesIterator = null;
|
||||
|
||||
public boolean hasNext() {
|
||||
return alignmentStartIterator.hasNext() ||
|
||||
(currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext());
|
||||
}
|
||||
|
||||
public SAMRecordAlignmentState next() {
|
||||
if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) {
|
||||
currentPositionReadStates = alignmentStartIterator.next();
|
||||
currentPositionReadStatesIterator = currentPositionReadStates.iterator();
|
||||
}
|
||||
|
||||
return currentPositionReadStatesIterator.next();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
currentPositionReadStatesIterator.remove();
|
||||
thisSampleReadStates--;
|
||||
totalReadStates--;
|
||||
|
||||
if ( currentPositionReadStates.isEmpty() ) {
|
||||
alignmentStartIterator.remove();
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,205 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.locusiterator.old;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
/**
|
||||
* Steps a single read along its alignment to the genome
|
||||
*
|
||||
* The logical model for generating extended events is as follows: the "record state"
|
||||
* implements the traversal along the reference; thus stepForwardOnGenome() returns
|
||||
* on every and only on actual reference bases. This can be a (mis)match or a deletion
|
||||
* (in the latter case, we still return on every individual reference base the deletion spans).
|
||||
* In the extended events mode, the record state also remembers if there was an insertion, or
|
||||
* if the deletion just started *right before* the current reference base the record state is
|
||||
* pointing to upon the return from stepForwardOnGenome(). The next call to stepForwardOnGenome()
|
||||
* will clear that memory (as we remember only extended events immediately preceding
|
||||
* the current reference base).
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 1/5/13
|
||||
* Time: 1:08 PM
|
||||
*/
|
||||
public class SAMRecordAlignmentState {
|
||||
// TODO -- one idea to clean up this functionality:
|
||||
// TODO --
|
||||
// TODO -- split functionality here into an alignment state machine and an
|
||||
// TODO -- alignment state. The alignment state simply carries with it the
|
||||
// TODO -- state of the alignment (the current cigar op, the genome offset,
|
||||
// TODO -- the read offset, etc. The AlignmentStateMachine produces these
|
||||
// TODO -- states, and has operations such stepForwardOnGenome, getLastState(),
|
||||
// TODO -- getCurrentState(), getNextState();
|
||||
|
||||
/**
|
||||
* Our read
|
||||
*/
|
||||
private final SAMRecord read;
|
||||
private final Cigar cigar;
|
||||
private final int nCigarElements;
|
||||
|
||||
/**
|
||||
* how far are we offset from the start of the read bases?
|
||||
*/
|
||||
int readOffset = -1;
|
||||
|
||||
/**
|
||||
* how far are we offset from the alignment start on the genome?
|
||||
*/
|
||||
int genomeOffset = -1;
|
||||
|
||||
int cigarOffset = -1;
|
||||
CigarElement curElement = null;
|
||||
|
||||
/**
|
||||
* how far are we into a single cigarElement?
|
||||
*/
|
||||
int cigarElementCounter = -1;
|
||||
|
||||
@Requires("read != null")
|
||||
// TODO -- should enforce contracts like the read is aligned, etc
|
||||
public SAMRecordAlignmentState(final SAMRecord read) {
|
||||
this.read = read;
|
||||
this.cigar = read.getCigar();
|
||||
this.nCigarElements = cigar.numCigarElements();
|
||||
}
|
||||
|
||||
public SAMRecord getRead() {
|
||||
return read;
|
||||
}
|
||||
|
||||
/**
|
||||
* What is our current offset in the read's bases that aligns us with the reference genome?
|
||||
*
|
||||
* @return the current read offset position
|
||||
*/
|
||||
public int getReadOffset() {
|
||||
return readOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* What is the current offset w.r.t. the alignment state that aligns us to the readOffset?
|
||||
*
|
||||
* @return the current offset
|
||||
*/
|
||||
public int getGenomeOffset() {
|
||||
return genomeOffset;
|
||||
}
|
||||
|
||||
public int getGenomePosition() {
|
||||
return read.getAlignmentStart() + getGenomeOffset();
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation(GenomeLocParser genomeLocParser) {
|
||||
return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
|
||||
}
|
||||
|
||||
public CigarOperator getCurrentCigarOperator() {
|
||||
return curElement.getOperator();
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s ro=%d go=%d co=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarOffset, cigarElementCounter, curElement);
|
||||
}
|
||||
|
||||
public CigarElement peekForwardOnGenome() {
|
||||
return ( cigarElementCounter + 1 > curElement.getLength() && cigarOffset + 1 < nCigarElements ? cigar.getCigarElement(cigarOffset + 1) : curElement );
|
||||
}
|
||||
|
||||
public CigarElement peekBackwardOnGenome() {
|
||||
return ( cigarElementCounter - 1 == 0 && cigarOffset - 1 > 0 ? cigar.getCigarElement(cigarOffset - 1) : curElement );
|
||||
}
|
||||
|
||||
public CigarOperator stepForwardOnGenome() {
|
||||
// we enter this method with readOffset = index of the last processed base on the read
|
||||
// (-1 if we did not process a single base yet); this can be last matching base,
|
||||
// or last base of an insertion
|
||||
if (curElement == null || ++cigarElementCounter > curElement.getLength()) {
|
||||
cigarOffset++;
|
||||
if (cigarOffset < nCigarElements) {
|
||||
curElement = cigar.getCigarElement(cigarOffset);
|
||||
cigarElementCounter = 0;
|
||||
// next line: guards against cigar elements of length 0; when new cigar element is retrieved,
|
||||
// we reenter in order to re-check cigarElementCounter against curElement's length
|
||||
return stepForwardOnGenome();
|
||||
} else {
|
||||
if (curElement != null && curElement.getOperator() == CigarOperator.D)
|
||||
throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
|
||||
|
||||
// Reads that contain indels model the genomeOffset as the following base in the reference. Because
|
||||
// we fall into this else block only when indels end the read, increment genomeOffset such that the
|
||||
// current offset of this read is the next ref base after the end of the indel. This position will
|
||||
// model a point on the reference somewhere after the end of the read.
|
||||
genomeOffset++; // extended events need that. Logically, it's legal to advance the genomic offset here:
|
||||
// we do step forward on the ref, and by returning null we also indicate that we are past the read end.
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
boolean done = false;
|
||||
switch (curElement.getOperator()) {
|
||||
case H: // ignore hard clips
|
||||
case P: // ignore pads
|
||||
cigarElementCounter = curElement.getLength();
|
||||
break;
|
||||
case I: // insertion w.r.t. the reference
|
||||
case S: // soft clip
|
||||
cigarElementCounter = curElement.getLength();
|
||||
readOffset += curElement.getLength();
|
||||
break;
|
||||
case D: // deletion w.r.t. the reference
|
||||
if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string
|
||||
throw new UserException.MalformedBAM(read, "read starts with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
|
||||
// should be the same as N case
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
break;
|
||||
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
break;
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
readOffset++;
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator());
|
||||
}
|
||||
|
||||
return done ? curElement.getOperator() : stepForwardOnGenome();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,82 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.locusiterator.old;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.downsampling.Downsampler;
|
||||
import org.broadinstitute.sting.gatk.downsampling.PassThroughDownsampler;
|
||||
import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler;
|
||||
import org.broadinstitute.sting.utils.locusiterator.LIBSDownsamplingInfo;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Divides reads by sample and (if requested) does a preliminary downsampling pass with a ReservoirDownsampler.
|
||||
*
|
||||
* Note: stores reads by sample ID string, not by sample object
|
||||
*/
|
||||
class SamplePartitioner {
|
||||
private Map<String, Downsampler<SAMRecord>> readsBySample;
|
||||
|
||||
public SamplePartitioner(final LIBSDownsamplingInfo LIBSDownsamplingInfo, final List<String> samples) {
|
||||
readsBySample = new HashMap<String, Downsampler<SAMRecord>>(samples.size());
|
||||
for ( String sample : samples ) {
|
||||
readsBySample.put(sample, createDownsampler(LIBSDownsamplingInfo));
|
||||
}
|
||||
}
|
||||
|
||||
private Downsampler<SAMRecord> createDownsampler(final LIBSDownsamplingInfo LIBSDownsamplingInfo) {
|
||||
return LIBSDownsamplingInfo.isPerformDownsampling()
|
||||
? new ReservoirDownsampler<SAMRecord>(LIBSDownsamplingInfo.getToCoverage())
|
||||
: new PassThroughDownsampler<SAMRecord>();
|
||||
}
|
||||
|
||||
public void submitRead(SAMRecord read) {
|
||||
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
|
||||
if (readsBySample.containsKey(sampleName))
|
||||
readsBySample.get(sampleName).submit(read);
|
||||
}
|
||||
|
||||
public void doneSubmittingReads() {
|
||||
for ( Map.Entry<String, Downsampler<SAMRecord>> perSampleReads : readsBySample.entrySet() ) {
|
||||
perSampleReads.getValue().signalEndOfInput();
|
||||
}
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> getReadsForSample(String sampleName) {
|
||||
if ( ! readsBySample.containsKey(sampleName) )
|
||||
throw new NoSuchElementException("Sample name not found");
|
||||
|
||||
return readsBySample.get(sampleName).consumeFinalizedItems();
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
for ( Map.Entry<String, Downsampler<SAMRecord>> perSampleReads : readsBySample.entrySet() ) {
|
||||
perSampleReads.getValue().clear();
|
||||
perSampleReads.getValue().reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -67,32 +67,32 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
|
|||
@Param
|
||||
private Downsampling downsampling;
|
||||
|
||||
public void timeDownsampling(int reps) {
|
||||
for(int i = 0; i < reps; i++) {
|
||||
SAMFileReader reader = new SAMFileReader(inputFile);
|
||||
ReadProperties readProperties = new ReadProperties(Collections.<SAMReaderID>singletonList(new SAMReaderID(inputFile,new Tags())),
|
||||
reader.getFileHeader(),
|
||||
SAMFileHeader.SortOrder.coordinate,
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.SILENT,
|
||||
downsampling.create(),
|
||||
new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
|
||||
Collections.<ReadFilter>emptyList(),
|
||||
Collections.<ReadTransformer>emptyList(),
|
||||
false,
|
||||
(byte)0,
|
||||
false);
|
||||
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
|
||||
// Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
|
||||
Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
|
||||
LegacyLocusIteratorByState locusIteratorByState = new LegacyLocusIteratorByState(readIterator,readProperties,genomeLocParser, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
while(locusIteratorByState.hasNext()) {
|
||||
locusIteratorByState.next().getLocation();
|
||||
}
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
// public void timeDownsampling(int reps) {
|
||||
// for(int i = 0; i < reps; i++) {
|
||||
// SAMFileReader reader = new SAMFileReader(inputFile);
|
||||
// ReadProperties readProperties = new ReadProperties(Collections.<SAMReaderID>singletonList(new SAMReaderID(inputFile,new Tags())),
|
||||
// reader.getFileHeader(),
|
||||
// SAMFileHeader.SortOrder.coordinate,
|
||||
// false,
|
||||
// SAMFileReader.ValidationStringency.SILENT,
|
||||
// downsampling.create(),
|
||||
// new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
|
||||
// Collections.<ReadFilter>emptyList(),
|
||||
// Collections.<ReadTransformer>emptyList(),
|
||||
// false,
|
||||
// (byte)0,
|
||||
// false);
|
||||
//
|
||||
// GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
|
||||
// // Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
|
||||
// Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
|
||||
// LegacyLocusIteratorByState locusIteratorByState = new LegacyLocusIteratorByState(readIterator,readProperties,genomeLocParser, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
// while(locusIteratorByState.hasNext()) {
|
||||
// locusIteratorByState.next().getLocation();
|
||||
// }
|
||||
// reader.close();
|
||||
// }
|
||||
// }
|
||||
|
||||
private enum Downsampling {
|
||||
NONE {
|
||||
|
|
|
|||
|
|
@ -31,7 +31,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.locusiterator.old.SAMRecordAlignmentState;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
@ -78,24 +77,24 @@ public class AlignmentStateMachinePerformance {
|
|||
}
|
||||
}
|
||||
break;
|
||||
case OLD_STATE:
|
||||
{
|
||||
final SAMRecordAlignmentState alignmentStateMachine = new SAMRecordAlignmentState(read);
|
||||
while ( alignmentStateMachine.stepForwardOnGenome() != null ) {
|
||||
alignmentStateMachine.getRead();
|
||||
nIterations++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
// case OLD_STATE:
|
||||
// {
|
||||
// final SAMRecordAlignmentState alignmentStateMachine = new SAMRecordAlignmentState(read);
|
||||
// while ( alignmentStateMachine.stepForwardOnGenome() != null ) {
|
||||
// alignmentStateMachine.getRead();
|
||||
// nIterations++;
|
||||
// }
|
||||
// }
|
||||
// break;
|
||||
case NEW_LIBS:
|
||||
{
|
||||
final List<SAMRecord> reads = Collections.nCopies(30, (SAMRecord)read);
|
||||
final List<SAMRecord> reads = Collections.nCopies(30, (SAMRecord) read);
|
||||
final org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState libs =
|
||||
new org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState(
|
||||
new LocusIteratorByStateBaseTest.FakeCloseableIterator<SAMRecord>(reads.iterator()),
|
||||
LocusIteratorByStateBaseTest.createTestReadProperties(),
|
||||
genomeLocParser,
|
||||
LocusIteratorByStateBaseTest.sampleListForSAMWithoutReadGroups());
|
||||
LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
|
||||
while ( libs.hasNext() ) {
|
||||
AlignmentContext context = libs.next();
|
||||
|
|
|
|||
|
|
@ -33,7 +33,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.locusiterator.old.SAMRecordAlignmentState;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
@ -71,14 +70,29 @@ public class LocusIteratorBenchmark extends SimpleBenchmark {
|
|||
}
|
||||
}
|
||||
|
||||
public void timeOriginalLIBS(int rep) {
|
||||
// public void timeOriginalLIBS(int rep) {
|
||||
// for ( int i = 0; i < rep; i++ ) {
|
||||
// final org.broadinstitute.sting.utils.locusiterator.old.LocusIteratorByState libs =
|
||||
// new org.broadinstitute.sting.utils.locusiterator.old.LocusIteratorByState(
|
||||
// new LocusIteratorByStateBaseTest.FakeCloseableIterator<SAMRecord>(reads.iterator()),
|
||||
// LocusIteratorByStateBaseTest.createTestReadProperties(),
|
||||
// genomeLocParser,
|
||||
// LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
//
|
||||
// while ( libs.hasNext() ) {
|
||||
// AlignmentContext context = libs.next();
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
public void timeLegacyLIBS(int rep) {
|
||||
for ( int i = 0; i < rep; i++ ) {
|
||||
final org.broadinstitute.sting.utils.locusiterator.old.LocusIteratorByState libs =
|
||||
new org.broadinstitute.sting.utils.locusiterator.old.LocusIteratorByState(
|
||||
final org.broadinstitute.sting.utils.locusiterator.legacy.LegacyLocusIteratorByState libs =
|
||||
new org.broadinstitute.sting.utils.locusiterator.legacy.LegacyLocusIteratorByState(
|
||||
new LocusIteratorByStateBaseTest.FakeCloseableIterator<SAMRecord>(reads.iterator()),
|
||||
LocusIteratorByStateBaseTest.createTestReadProperties(),
|
||||
genomeLocParser,
|
||||
LocusIteratorByStateBaseTest.sampleListForSAMWithoutReadGroups());
|
||||
LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
|
||||
while ( libs.hasNext() ) {
|
||||
AlignmentContext context = libs.next();
|
||||
|
|
@ -93,7 +107,7 @@ public class LocusIteratorBenchmark extends SimpleBenchmark {
|
|||
new LocusIteratorByStateBaseTest.FakeCloseableIterator<SAMRecord>(reads.iterator()),
|
||||
LocusIteratorByStateBaseTest.createTestReadProperties(),
|
||||
genomeLocParser,
|
||||
LocusIteratorByStateBaseTest.sampleListForSAMWithoutReadGroups());
|
||||
LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
|
||||
while ( libs.hasNext() ) {
|
||||
AlignmentContext context = libs.next();
|
||||
|
|
@ -101,16 +115,16 @@ public class LocusIteratorBenchmark extends SimpleBenchmark {
|
|||
}
|
||||
}
|
||||
|
||||
public void timeOriginalLIBSStateMachine(int rep) {
|
||||
for ( int i = 0; i < rep; i++ ) {
|
||||
for ( final SAMRecord read : reads ) {
|
||||
final SAMRecordAlignmentState alignmentStateMachine = new SAMRecordAlignmentState(read);
|
||||
while ( alignmentStateMachine.stepForwardOnGenome() != null ) {
|
||||
alignmentStateMachine.getGenomeOffset();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// public void timeOriginalLIBSStateMachine(int rep) {
|
||||
// for ( int i = 0; i < rep; i++ ) {
|
||||
// for ( final SAMRecord read : reads ) {
|
||||
// final SAMRecordAlignmentState alignmentStateMachine = new SAMRecordAlignmentState(read);
|
||||
// while ( alignmentStateMachine.stepForwardOnGenome() != null ) {
|
||||
// alignmentStateMachine.getGenomeOffset();
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
public void timeAlignmentStateMachine(int rep) {
|
||||
for ( int i = 0; i < rep; i++ ) {
|
||||
|
|
|
|||
|
|
@ -57,22 +57,12 @@ public class LocusIteratorByStateBaseTest extends BaseTest {
|
|||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
}
|
||||
|
||||
/**
|
||||
* For testing only. Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list
|
||||
* for the system.
|
||||
*/
|
||||
public static List<String> sampleListForSAMWithoutReadGroups() {
|
||||
List<String> samples = new ArrayList<String>();
|
||||
samples.add(null);
|
||||
return samples;
|
||||
}
|
||||
|
||||
protected LocusIteratorByState makeLTBS(List<SAMRecord> reads,
|
||||
ReadProperties readAttributes) {
|
||||
return new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),
|
||||
readAttributes,
|
||||
genomeLocParser,
|
||||
sampleListForSAMWithoutReadGroups());
|
||||
LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
}
|
||||
|
||||
public static ReadProperties createTestReadProperties() {
|
||||
|
|
|
|||
|
|
@ -27,9 +27,6 @@ package org.broadinstitute.sting.utils.locusiterator;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.locusiterator.LIBSDownsamplingInfo;
|
||||
import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByStateBaseTest;
|
||||
import org.broadinstitute.sting.utils.locusiterator.old.SAMRecordAlignmentState;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -65,7 +62,7 @@ public class ReadStateManagerUnitTest extends LocusIteratorByStateBaseTest {
|
|||
}
|
||||
|
||||
public void run() {
|
||||
final List<String> samples = sampleListForSAMWithoutReadGroups();
|
||||
final List<String> samples = LocusIteratorByState.sampleListForSAMWithoutReadGroups();
|
||||
final Iterator<SAMRecord> iterator = new LinkedList<SAMRecord>().iterator();
|
||||
ReadStateManager readStateManager = new ReadStateManager(iterator, samples, LIBSDownsamplingInfo.NO_DOWNSAMPLING, false);
|
||||
ReadStateManager.PerSampleReadStateManager perSampleReadStateManager = readStateManager.new PerSampleReadStateManager(LIBSDownsamplingInfo.NO_DOWNSAMPLING);
|
||||
|
|
|
|||
|
|
@ -1,463 +0,0 @@
|
|||
//package org.broadinstitute.sting.utils.locusiterator.old;
|
||||
//
|
||||
//import net.sf.samtools.*;
|
||||
//import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
//import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
//import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||
//import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||
//import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
//import org.broadinstitute.sting.utils.Utils;
|
||||
//import org.broadinstitute.sting.utils.locusiterator.LIBS_position;
|
||||
//import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByStateBaseTest;
|
||||
//import org.broadinstitute.sting.utils.locusiterator.old.LocusIteratorByState;
|
||||
//import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
//import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
//import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
//import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
//import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
//import org.testng.Assert;
|
||||
//import org.testng.annotations.DataProvider;
|
||||
//import org.testng.annotations.Test;
|
||||
//
|
||||
//import java.util.*;
|
||||
//
|
||||
///**
|
||||
// * testing of the new (non-legacy) version of LocusIteratorByState
|
||||
// */
|
||||
//public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||
//
|
||||
// // TODO -- REMOVE ME WHEN LIBS IS FIXED
|
||||
// // TODO -- CURRENT CODE DOESN'T CORRECTLY COMPUTE THINGS LIKE BEFORE DELETION, AFTER INSERTION, ETC
|
||||
// private final static boolean ALLOW_BROKEN_LIBS_STATE = true;
|
||||
//
|
||||
// protected LocusIteratorByState li;
|
||||
//
|
||||
// @Test
|
||||
// public void testXandEQOperators() {
|
||||
// final byte[] bases1 = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
||||
// final byte[] bases2 = new byte[] {'A','A','A','C','A','A','A','A','A','C'};
|
||||
//
|
||||
// // create a test version of the Reads object
|
||||
// ReadProperties readAttributes = createTestReadProperties();
|
||||
//
|
||||
// SAMRecord r1 = ArtificialSAMUtils.createArtificialRead(header,"r1",0,1,10);
|
||||
// r1.setReadBases(bases1);
|
||||
// r1.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||
// r1.setCigarString("10M");
|
||||
//
|
||||
// SAMRecord r2 = ArtificialSAMUtils.createArtificialRead(header,"r2",0,1,10);
|
||||
// r2.setReadBases(bases2);
|
||||
// r2.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
|
||||
// r2.setCigarString("3=1X5=1X");
|
||||
//
|
||||
// SAMRecord r3 = ArtificialSAMUtils.createArtificialRead(header,"r3",0,1,10);
|
||||
// r3.setReadBases(bases2);
|
||||
// r3.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
|
||||
// r3.setCigarString("3=1X5M1X");
|
||||
//
|
||||
// SAMRecord r4 = ArtificialSAMUtils.createArtificialRead(header,"r4",0,1,10);
|
||||
// r4.setReadBases(bases2);
|
||||
// r4.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||
// r4.setCigarString("10M");
|
||||
//
|
||||
// List<SAMRecord> reads = Arrays.asList(r1, r2, r3, r4);
|
||||
//
|
||||
// // create the iterator by state with the fake reads and fake records
|
||||
// li = makeLTBS(reads,readAttributes);
|
||||
//
|
||||
// while (li.hasNext()) {
|
||||
// AlignmentContext context = li.next();
|
||||
// ReadBackedPileup pileup = context.getBasePileup();
|
||||
// Assert.assertEquals(pileup.depthOfCoverage(), 4);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testIndelsInRegularPileup() {
|
||||
// final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
||||
// final byte[] indelBases = new byte[] {'A','A','A','A','C','T','A','A','A','A','A','A'};
|
||||
//
|
||||
// // create a test version of the Reads object
|
||||
// ReadProperties readAttributes = createTestReadProperties();
|
||||
//
|
||||
// SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
|
||||
// before.setReadBases(bases);
|
||||
// before.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||
// before.setCigarString("10M");
|
||||
//
|
||||
// SAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,2,10);
|
||||
// during.setReadBases(indelBases);
|
||||
// during.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
|
||||
// during.setCigarString("4M2I6M");
|
||||
//
|
||||
// SAMRecord after = ArtificialSAMUtils.createArtificialRead(header,"after",0,3,10);
|
||||
// after.setReadBases(bases);
|
||||
// after.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||
// after.setCigarString("10M");
|
||||
//
|
||||
// List<SAMRecord> reads = Arrays.asList(before, during, after);
|
||||
//
|
||||
// // create the iterator by state with the fake reads and fake records
|
||||
// li = makeLTBS(reads,readAttributes);
|
||||
//
|
||||
// boolean foundIndel = false;
|
||||
// while (li.hasNext()) {
|
||||
// AlignmentContext context = li.next();
|
||||
// ReadBackedPileup pileup = context.getBasePileup().getBaseFilteredPileup(10);
|
||||
// for (PileupElement p : pileup) {
|
||||
// if (p.isBeforeInsertion()) {
|
||||
// foundIndel = true;
|
||||
// Assert.assertEquals(p.getLengthOfImmediatelyFollowingIndel(), 2, "Wrong event length");
|
||||
// Assert.assertEquals(p.getBasesOfImmediatelyFollowingInsertion(), "CT", "Inserted bases are incorrect");
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// }
|
||||
//
|
||||
// Assert.assertTrue(foundIndel,"Indel in pileup not found");
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testWholeIndelReadInIsolation() {
|
||||
// final int firstLocus = 44367789;
|
||||
//
|
||||
// // create a test version of the Reads object
|
||||
// ReadProperties readAttributes = createTestReadProperties();
|
||||
//
|
||||
// SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header, "indelOnly", 0, firstLocus, 76);
|
||||
// indelOnlyRead.setReadBases(Utils.dupBytes((byte)'A',76));
|
||||
// indelOnlyRead.setBaseQualities(Utils.dupBytes((byte) '@', 76));
|
||||
// indelOnlyRead.setCigarString("76I");
|
||||
//
|
||||
// List<SAMRecord> reads = Arrays.asList(indelOnlyRead);
|
||||
//
|
||||
// // create the iterator by state with the fake reads and fake records
|
||||
// li = makeLTBS(reads, readAttributes);
|
||||
//
|
||||
// // Traditionally, reads that end with indels bleed into the pileup at the following locus. Verify that the next pileup contains this read
|
||||
// // and considers it to be an indel-containing read.
|
||||
// Assert.assertTrue(li.hasNext(),"Should have found a whole-indel read in the normal base pileup without extended events enabled");
|
||||
// AlignmentContext alignmentContext = li.next();
|
||||
// Assert.assertEquals(alignmentContext.getLocation().getStart(), firstLocus, "Base pileup is at incorrect location.");
|
||||
// ReadBackedPileup basePileup = alignmentContext.getBasePileup();
|
||||
// Assert.assertEquals(basePileup.getReads().size(),1,"Pileup is of incorrect size");
|
||||
// Assert.assertSame(basePileup.getReads().get(0), indelOnlyRead, "Read in pileup is incorrect");
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Test to make sure that reads supporting only an indel (example cigar string: 76I) do
|
||||
// * not negatively influence the ordering of the pileup.
|
||||
// */
|
||||
// @Test
|
||||
// public void testWholeIndelRead() {
|
||||
// final int firstLocus = 44367788, secondLocus = firstLocus + 1;
|
||||
//
|
||||
// SAMRecord leadingRead = ArtificialSAMUtils.createArtificialRead(header,"leading",0,firstLocus,76);
|
||||
// leadingRead.setReadBases(Utils.dupBytes((byte)'A',76));
|
||||
// leadingRead.setBaseQualities(Utils.dupBytes((byte)'@',76));
|
||||
// leadingRead.setCigarString("1M75I");
|
||||
//
|
||||
// SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header,"indelOnly",0,secondLocus,76);
|
||||
// indelOnlyRead.setReadBases(Utils.dupBytes((byte) 'A', 76));
|
||||
// indelOnlyRead.setBaseQualities(Utils.dupBytes((byte)'@',76));
|
||||
// indelOnlyRead.setCigarString("76I");
|
||||
//
|
||||
// SAMRecord fullMatchAfterIndel = ArtificialSAMUtils.createArtificialRead(header,"fullMatch",0,secondLocus,76);
|
||||
// fullMatchAfterIndel.setReadBases(Utils.dupBytes((byte)'A',76));
|
||||
// fullMatchAfterIndel.setBaseQualities(Utils.dupBytes((byte)'@',76));
|
||||
// fullMatchAfterIndel.setCigarString("75I1M");
|
||||
//
|
||||
// List<SAMRecord> reads = Arrays.asList(leadingRead, indelOnlyRead, fullMatchAfterIndel);
|
||||
//
|
||||
// // create the iterator by state with the fake reads and fake records
|
||||
// li = makeLTBS(reads, createTestReadProperties());
|
||||
// int currentLocus = firstLocus;
|
||||
// int numAlignmentContextsFound = 0;
|
||||
//
|
||||
// while(li.hasNext()) {
|
||||
// AlignmentContext alignmentContext = li.next();
|
||||
// Assert.assertEquals(alignmentContext.getLocation().getStart(),currentLocus,"Current locus returned by alignment context is incorrect");
|
||||
//
|
||||
// if(currentLocus == firstLocus) {
|
||||
// List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
|
||||
// Assert.assertEquals(readsAtLocus.size(),1,"Wrong number of reads at locus " + currentLocus);
|
||||
// Assert.assertSame(readsAtLocus.get(0),leadingRead,"leadingRead absent from pileup at locus " + currentLocus);
|
||||
// }
|
||||
// else if(currentLocus == secondLocus) {
|
||||
// List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
|
||||
// Assert.assertEquals(readsAtLocus.size(),2,"Wrong number of reads at locus " + currentLocus);
|
||||
// Assert.assertSame(readsAtLocus.get(0),indelOnlyRead,"indelOnlyRead absent from pileup at locus " + currentLocus);
|
||||
// Assert.assertSame(readsAtLocus.get(1),fullMatchAfterIndel,"fullMatchAfterIndel absent from pileup at locus " + currentLocus);
|
||||
// }
|
||||
//
|
||||
// currentLocus++;
|
||||
// numAlignmentContextsFound++;
|
||||
// }
|
||||
//
|
||||
// Assert.assertEquals(numAlignmentContextsFound, 2, "Found incorrect number of alignment contexts");
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Test to make sure that reads supporting only an indel (example cigar string: 76I) are represented properly
|
||||
// */
|
||||
// @Test
|
||||
// public void testWholeIndelReadRepresentedTest() {
|
||||
// final int firstLocus = 44367788, secondLocus = firstLocus + 1;
|
||||
//
|
||||
// SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,secondLocus,1);
|
||||
// read1.setReadBases(Utils.dupBytes((byte) 'A', 1));
|
||||
// read1.setBaseQualities(Utils.dupBytes((byte) '@', 1));
|
||||
// read1.setCigarString("1I");
|
||||
//
|
||||
// List<SAMRecord> reads = Arrays.asList(read1);
|
||||
//
|
||||
// // create the iterator by state with the fake reads and fake records
|
||||
// li = makeLTBS(reads, createTestReadProperties());
|
||||
//
|
||||
// while(li.hasNext()) {
|
||||
// AlignmentContext alignmentContext = li.next();
|
||||
// ReadBackedPileup p = alignmentContext.getBasePileup();
|
||||
// Assert.assertTrue(p.getNumberOfElements() == 1);
|
||||
// PileupElement pe = p.iterator().next();
|
||||
// Assert.assertTrue(pe.isBeforeInsertion());
|
||||
// Assert.assertFalse(pe.isAfterInsertion());
|
||||
// Assert.assertEquals(pe.getBasesOfImmediatelyFollowingInsertion(), "A");
|
||||
// }
|
||||
//
|
||||
// SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,secondLocus,10);
|
||||
// read2.setReadBases(Utils.dupBytes((byte) 'A', 10));
|
||||
// read2.setBaseQualities(Utils.dupBytes((byte) '@', 10));
|
||||
// read2.setCigarString("10I");
|
||||
//
|
||||
// reads = Arrays.asList(read2);
|
||||
//
|
||||
// // create the iterator by state with the fake reads and fake records
|
||||
// li = makeLTBS(reads, createTestReadProperties());
|
||||
//
|
||||
// while(li.hasNext()) {
|
||||
// AlignmentContext alignmentContext = li.next();
|
||||
// ReadBackedPileup p = alignmentContext.getBasePileup();
|
||||
// Assert.assertTrue(p.getNumberOfElements() == 1);
|
||||
// PileupElement pe = p.iterator().next();
|
||||
// Assert.assertTrue(pe.isBeforeInsertion());
|
||||
// Assert.assertFalse(pe.isAfterInsertion());
|
||||
// Assert.assertEquals(pe.getBasesOfImmediatelyFollowingInsertion(), "AAAAAAAAAA");
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// ////////////////////////////////////////////
|
||||
// // comprehensive LIBS/PileupElement tests //
|
||||
// ////////////////////////////////////////////
|
||||
//
|
||||
// @DataProvider(name = "LIBSTest")
|
||||
// public Object[][] makeLIBSTest() {
|
||||
// final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
//
|
||||
// tests.add(new Object[]{new LIBSTest("1I", 1)});
|
||||
// tests.add(new Object[]{new LIBSTest("10I", 10)});
|
||||
// tests.add(new Object[]{new LIBSTest("2M2I2M", 6)});
|
||||
// tests.add(new Object[]{new LIBSTest("2M2I", 4)});
|
||||
// //TODO -- uncomment these when LIBS is fixed
|
||||
// //{new LIBSTest("2I2M", 4, Arrays.asList(2,3), Arrays.asList(IS_AFTER_INSERTION_FLAG,0))},
|
||||
// //{new LIBSTest("1I1M1D1M", 3, Arrays.asList(0,1), Arrays.asList(IS_AFTER_INSERTION_FLAG | IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG))},
|
||||
// //{new LIBSTest("1S1I1M", 3, Arrays.asList(2), Arrays.asList(IS_AFTER_INSERTION_FLAG))},
|
||||
// //{new LIBSTest("1M2D2M", 3)},
|
||||
// tests.add(new Object[]{new LIBSTest("1S1M", 2)});
|
||||
// tests.add(new Object[]{new LIBSTest("1M1S", 2)});
|
||||
// tests.add(new Object[]{new LIBSTest("1S1M1I", 3)});
|
||||
//
|
||||
// return tests.toArray(new Object[][]{});
|
||||
//
|
||||
// // TODO -- enable combinatorial tests here when LIBS is fixed
|
||||
//// return createLIBSTests(
|
||||
//// Arrays.asList(1, 10),
|
||||
//// Arrays.asList(1, 2, 3));
|
||||
// }
|
||||
//
|
||||
// @Test(dataProvider = "LIBSTest")
|
||||
// public void testLIBS(LIBSTest params) {
|
||||
// if ( params.getElements() == null || params.getElements().get(0).getOperator() == CigarOperator.I )
|
||||
// // TODO -- ENABLE ME WHEN LIBS IS FIXED
|
||||
// return;
|
||||
//
|
||||
// // create the iterator by state with the fake reads and fake records
|
||||
// final GATKSAMRecord read = params.makeRead();
|
||||
// li = makeLTBS(Arrays.asList((SAMRecord)read), createTestReadProperties());
|
||||
// final LIBS_position tester = new LIBS_position(read);
|
||||
//
|
||||
// int bpVisited = 0;
|
||||
// while ( li.hasNext() ) {
|
||||
// bpVisited++;
|
||||
//
|
||||
// AlignmentContext alignmentContext = li.next();
|
||||
// ReadBackedPileup p = alignmentContext.getBasePileup();
|
||||
// Assert.assertTrue(p.getNumberOfElements() == 1);
|
||||
// PileupElement pe = p.iterator().next();
|
||||
//
|
||||
// tester.stepForwardOnGenome();
|
||||
//
|
||||
// if ( ! ALLOW_BROKEN_LIBS_STATE ) {
|
||||
// Assert.assertEquals(pe.isBeforeDeletedBase(), tester.isBeforeDeletedBase);
|
||||
// Assert.assertEquals(pe.isBeforeDeletionStart(), tester.isBeforeDeletionStart);
|
||||
// Assert.assertEquals(pe.isAfterDeletedBase(), tester.isAfterDeletedBase);
|
||||
// Assert.assertEquals(pe.isAfterDeletionEnd(), tester.isAfterDeletionEnd);
|
||||
// Assert.assertEquals(pe.isBeforeInsertion(), tester.isBeforeInsertion);
|
||||
// Assert.assertEquals(pe.isAfterInsertion(), tester.isAfterInsertion);
|
||||
// Assert.assertEquals(pe.isNextToSoftClip(), tester.isNextToSoftClip);
|
||||
// }
|
||||
//
|
||||
// Assert.assertEquals(pe.getOffset(), tester.getCurrentReadOffset());
|
||||
// }
|
||||
//
|
||||
// // min is one because always visit something, even for 10I reads
|
||||
// final int expectedBpToVisit = Math.max(read.getAlignmentEnd() - read.getAlignmentStart() + 1, 1);
|
||||
// Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp");
|
||||
// }
|
||||
//
|
||||
// // ------------------------------------------------------------
|
||||
// //
|
||||
// // Tests for keeping reads
|
||||
// //
|
||||
// // ------------------------------------------------------------
|
||||
//
|
||||
// @DataProvider(name = "LIBSKeepSubmittedReads")
|
||||
// public Object[][] makeLIBSKeepSubmittedReads() {
|
||||
// final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
//
|
||||
// for ( final boolean doSampling : Arrays.asList(true, false) ) {
|
||||
// for ( final int nReadsPerLocus : Arrays.asList(1, 10) ) {
|
||||
// for ( final int nLoci : Arrays.asList(1, 10, 25) ) {
|
||||
// for ( final int nSamples : Arrays.asList(1, 2, 10) ) {
|
||||
// for ( final boolean keepReads : Arrays.asList(true, false) ) {
|
||||
// for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true, false) ) {
|
||||
//// for ( final int nReadsPerLocus : Arrays.asList(1) ) {
|
||||
//// for ( final int nLoci : Arrays.asList(10) ) {
|
||||
//// for ( final int nSamples : Arrays.asList(1) ) {
|
||||
//// for ( final boolean keepReads : Arrays.asList(true) ) {
|
||||
//// for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true) ) {
|
||||
// tests.add(new Object[]{nReadsPerLocus, nLoci, nSamples, keepReads, grabReadsAfterEachCycle, doSampling});
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// return tests.toArray(new Object[][]{});
|
||||
// }
|
||||
//
|
||||
// @Test(enabled = true, dataProvider = "LIBSKeepSubmittedReads")
|
||||
// public void testLIBSKeepSubmittedReads(final int nReadsPerLocus,
|
||||
// final int nLoci,
|
||||
// final int nSamples,
|
||||
// final boolean keepReads,
|
||||
// final boolean grabReadsAfterEachCycle,
|
||||
// final boolean downsample) {
|
||||
// logger.warn(String.format("testLIBSKeepSubmittedReads %d %d %d %b %b %b", nReadsPerLocus, nLoci, nSamples, keepReads, grabReadsAfterEachCycle, downsample));
|
||||
// final int readLength = 10;
|
||||
//
|
||||
// final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 100000);
|
||||
// final List<String> samples = new ArrayList<String>(nSamples);
|
||||
// for ( int i = 0; i < nSamples; i++ ) {
|
||||
// final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("rg" + i);
|
||||
// final String sample = "sample" + i;
|
||||
// samples.add(sample);
|
||||
// rg.setSample(sample);
|
||||
// rg.setPlatform(NGSPlatform.ILLUMINA.getDefaultPlatform());
|
||||
// header.addReadGroup(rg);
|
||||
// }
|
||||
//
|
||||
// final int maxCoveragePerSampleAtLocus = nReadsPerLocus * readLength / 2;
|
||||
// final int maxDownsampledCoverage = Math.max(maxCoveragePerSampleAtLocus / 2, 1);
|
||||
// final DownsamplingMethod downsampler = downsample
|
||||
// ? new DownsamplingMethod(DownsampleType.BY_SAMPLE, maxDownsampledCoverage, null, false)
|
||||
// : new DownsamplingMethod(DownsampleType.NONE, null, null, false);
|
||||
// final List<SAMRecord> reads = ArtificialSAMUtils.createReadStream(nReadsPerLocus, nLoci, header, 1, readLength);
|
||||
// li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),
|
||||
// createTestReadProperties(downsampler, keepReads),
|
||||
// genomeLocParser,
|
||||
// samples);
|
||||
//
|
||||
// final Set<SAMRecord> seenSoFar = new HashSet<SAMRecord>();
|
||||
// final Set<SAMRecord> keptReads = new HashSet<SAMRecord>();
|
||||
// int bpVisited = 0;
|
||||
// while ( li.hasNext() ) {
|
||||
// bpVisited++;
|
||||
// final AlignmentContext alignmentContext = li.next();
|
||||
// final ReadBackedPileup p = alignmentContext.getBasePileup();
|
||||
//
|
||||
// if ( downsample ) {
|
||||
// // just not a safe test
|
||||
// //Assert.assertTrue(p.getNumberOfElements() <= maxDownsampledCoverage * nSamples, "Too many reads at locus after downsampling");
|
||||
// } else {
|
||||
// final int minPileupSize = nReadsPerLocus * nSamples;
|
||||
// Assert.assertTrue(p.getNumberOfElements() >= minPileupSize);
|
||||
// }
|
||||
//
|
||||
// seenSoFar.addAll(p.getReads());
|
||||
// if ( keepReads && grabReadsAfterEachCycle ) {
|
||||
// final List<SAMRecord> locusReads = li.transferReadsFromAllPreviousPileups();
|
||||
//
|
||||
// // the number of reads starting here
|
||||
// int nReadsStartingHere = 0;
|
||||
// for ( final SAMRecord read : p.getReads() )
|
||||
// if ( read.getAlignmentStart() == alignmentContext.getPosition() )
|
||||
// nReadsStartingHere++;
|
||||
//
|
||||
// if ( downsample )
|
||||
// // with downsampling we might have some reads here that were downsampled away
|
||||
// // in the pileup
|
||||
// Assert.assertTrue(locusReads.size() >= nReadsStartingHere);
|
||||
// else
|
||||
// Assert.assertEquals(locusReads.size(), nReadsStartingHere);
|
||||
// keptReads.addAll(locusReads);
|
||||
//
|
||||
// // check that all reads we've seen so far are in our keptReads
|
||||
// for ( final SAMRecord read : seenSoFar ) {
|
||||
// Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if ( ! keepReads )
|
||||
// Assert.assertTrue(li.getReadsFromAllPreviousPileups().isEmpty(), "Not keeping reads but the underlying list of reads isn't empty");
|
||||
// }
|
||||
//
|
||||
// if ( keepReads && ! grabReadsAfterEachCycle )
|
||||
// keptReads.addAll(li.transferReadsFromAllPreviousPileups());
|
||||
//
|
||||
// if ( ! downsample ) { // downsampling may drop loci
|
||||
// final int expectedBpToVisit = nLoci + readLength - 1;
|
||||
// Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp");
|
||||
// }
|
||||
//
|
||||
// if ( keepReads ) {
|
||||
// // check we have the right number of reads
|
||||
// final int totalReads = nLoci * nReadsPerLocus * nSamples;
|
||||
// if ( ! downsample ) { // downsampling may drop reads
|
||||
// Assert.assertEquals(keptReads.size(), totalReads, "LIBS didn't keep the right number of reads during the traversal");
|
||||
//
|
||||
// // check that the order of reads is the same as in our read list
|
||||
// for ( int i = 0; i < reads.size(); i++ ) {
|
||||
// final SAMRecord inputRead = reads.get(i);
|
||||
// final SAMRecord keptRead = reads.get(i);
|
||||
// Assert.assertSame(keptRead, inputRead, "Input reads and kept reads differ at position " + i);
|
||||
// }
|
||||
// } else {
|
||||
// Assert.assertTrue(keptReads.size() <= totalReads, "LIBS didn't keep the right number of reads during the traversal");
|
||||
// }
|
||||
//
|
||||
// // check uniqueness
|
||||
// final Set<String> readNames = new HashSet<String>();
|
||||
// for ( final SAMRecord read : keptReads ) {
|
||||
// Assert.assertFalse(readNames.contains(read.getReadName()), "Found duplicate reads in the kept reads");
|
||||
// readNames.add(read.getReadName());
|
||||
// }
|
||||
//
|
||||
// // check that all reads we've seen are in our keptReads
|
||||
// for ( final SAMRecord read : seenSoFar ) {
|
||||
// Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
|
|
@ -1,81 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.locusiterator.old;
|
||||
|
||||
import org.broadinstitute.sting.utils.locusiterator.LIBS_position;
|
||||
import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByStateBaseTest;
|
||||
import org.broadinstitute.sting.utils.locusiterator.old.SAMRecordAlignmentState;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* testing of the new (non-legacy) version of LocusIteratorByState
|
||||
*/
|
||||
public class SAMRecordAlignmentStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||
@DataProvider(name = "AlignmentStateTest")
|
||||
public Object[][] makeAlignmentStateTest() {
|
||||
// return new Object[][]{{new LIBSTest("1I", 1)}};
|
||||
return createLIBSTests(
|
||||
Arrays.asList(1, 2),
|
||||
Arrays.asList(1, 2, 3, 4));
|
||||
}
|
||||
|
||||
@Test(dataProvider = "AlignmentStateTest")
|
||||
public void testAlignmentStateTest(LIBSTest params) {
|
||||
final GATKSAMRecord read = params.makeRead();
|
||||
final SAMRecordAlignmentState state = new SAMRecordAlignmentState(read);
|
||||
final LIBS_position tester = new LIBS_position(read);
|
||||
|
||||
Assert.assertSame(state.getRead(), read);
|
||||
Assert.assertNotNull(state.toString());
|
||||
|
||||
int bpVisited = 0;
|
||||
int lastOffset = -1;
|
||||
while ( state.stepForwardOnGenome() != null ) {
|
||||
bpVisited++;
|
||||
tester.stepForwardOnGenome();
|
||||
Assert.assertTrue(state.getReadOffset() >= lastOffset, "Somehow read offsets are decreasing: lastOffset " + lastOffset + " current " + state.getReadOffset());
|
||||
Assert.assertEquals(state.getReadOffset(), tester.getCurrentReadOffset(), "Read offsets are wrong at " + bpVisited);
|
||||
|
||||
// TODO -- state.peekBackwardOnGenome();
|
||||
// TODO -- state.peekForwardOnGenome();
|
||||
// TODO -- state.getCurrentCigarOperator()
|
||||
// TODO -- state.getGenomeOffset();
|
||||
// TODO -- state.getGenomePosition();
|
||||
// TODO -- Assert.assertEquals(state.getLocation(genomeLocParser), EXPECTATION);
|
||||
|
||||
lastOffset = state.getReadOffset();
|
||||
}
|
||||
|
||||
// min is one because always visit something, even for 10I reads
|
||||
final int expectedBpToVisit = read.getAlignmentEnd() - read.getAlignmentStart() + 1;
|
||||
Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp");
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue