Delete old LocusIteratorByState, leaving only new LIBS and legacy

This commit is contained in:
Mark DePristo 2013-01-11 10:59:24 -05:00
parent bd03511e35
commit cc0c1b752a
13 changed files with 81 additions and 1578 deletions

View File

@ -113,6 +113,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
// Use the legacy version of LocusIteratorByState if legacy downsampling was requested:
libs = ! sourceInfo.getDownsamplingMethod().useLegacyDownsampler ? new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleNames) : null;
this.sourceIterator = sourceInfo.getDownsamplingMethod().useLegacyDownsampler
// TODO -- remove me when we collapse legacy engine fork
? new PeekableIterator<AlignmentContext>(new LegacyLocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleNames))
: new PeekableIterator<AlignmentContext>(libs);
@ -120,7 +121,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
}
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals ) {
this(shard, genomeLocParser, iterator, intervals, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups());
this(shard, genomeLocParser, iterator, intervals, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
}
public Iterator<WindowMakerIterator> iterator() {

View File

@ -403,4 +403,14 @@ public class LocusIteratorByState extends LocusIterator {
throw new IllegalStateException("Tried to create a pileup for read " + read + " with offset " + offset +
" but we never saw such an offset in the alignment state machine");
}
/**
* For testing only. Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list
* for the system.
*/
public static List<String> sampleListForSAMWithoutReadGroups() {
List<String> samples = new ArrayList<String>();
samples.add(null);
return samples;
}
}

View File

@ -1,326 +0,0 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.locusiterator.old;
import com.google.java.contract.Ensures;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.downsampling.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.locusiterator.LIBSDownsamplingInfo;
import org.broadinstitute.sting.utils.locusiterator.LocusIterator;
import org.broadinstitute.sting.utils.locusiterator.legacy.LegacyLocusIteratorByState;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.util.*;
/**
* Iterator that traverses a SAM File, accumulating information on a per-locus basis
*/
public class LocusIteratorByState extends LocusIterator {
/**
* our log, which we want to capture anything from this class
*/
private static Logger logger = Logger.getLogger(LegacyLocusIteratorByState.class);
// -----------------------------------------------------------------------------------------------------------------
//
// member fields
//
// -----------------------------------------------------------------------------------------------------------------
/**
* Used to create new GenomeLocs.
*/
private final GenomeLocParser genomeLocParser;
private final ArrayList<String> samples;
private final ReadStateManager readStates;
private final boolean includeReadsWithDeletionAtLoci;
private AlignmentContext nextAlignmentContext;
// -----------------------------------------------------------------------------------------------------------------
//
// constructors and other basic operations
//
// -----------------------------------------------------------------------------------------------------------------
public LocusIteratorByState(final Iterator<SAMRecord> samIterator,
final ReadProperties readInformation,
final GenomeLocParser genomeLocParser,
final Collection<String> samples) {
this(samIterator,
toDownsamplingInfo(readInformation),
readInformation.includeReadsWithDeletionAtLoci(),
genomeLocParser,
samples,
readInformation.keepUniqueReadListInLIBS());
}
protected LocusIteratorByState(final Iterator<SAMRecord> samIterator,
final LIBSDownsamplingInfo downsamplingInfo,
final boolean includeReadsWithDeletionAtLoci,
final GenomeLocParser genomeLocParser,
final Collection<String> samples,
final boolean maintainUniqueReadsList ) {
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
this.genomeLocParser = genomeLocParser;
this.samples = new ArrayList<String>(samples);
this.readStates = new ReadStateManager(samIterator, this.samples, downsamplingInfo, maintainUniqueReadsList);
// currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
// there's no read data. So we need to throw this error only when samIterator.hasNext() is true
if (this.samples.isEmpty() && samIterator.hasNext()) {
throw new IllegalArgumentException("samples list must not be empty");
}
}
@Override
public Iterator<AlignmentContext> iterator() {
return this;
}
@Override
public void close() {
}
@Override
public boolean hasNext() {
lazyLoadNextAlignmentContext();
return nextAlignmentContext != null;
}
private GenomeLoc getLocation() {
return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser);
}
// -----------------------------------------------------------------------------------------------------------------
//
// next() routine and associated collection operations
//
// -----------------------------------------------------------------------------------------------------------------
@Override
public AlignmentContext next() {
lazyLoadNextAlignmentContext();
if (!hasNext())
throw new NoSuchElementException("LocusIteratorByState: out of elements.");
AlignmentContext currentAlignmentContext = nextAlignmentContext;
nextAlignmentContext = null;
return currentAlignmentContext;
}
/**
* Creates the next alignment context from the given state. Note that this is implemented as a lazy load method.
* nextAlignmentContext MUST BE null in order for this method to advance to the next entry.
*/
private void lazyLoadNextAlignmentContext() {
while (nextAlignmentContext == null && readStates.hasNext()) {
readStates.collectPendingReads();
final GenomeLoc location = getLocation();
final Map<String, ReadBackedPileupImpl> fullPileup = new HashMap<String, ReadBackedPileupImpl>();
// TODO: How can you determine here whether the current pileup has been downsampled?
boolean hasBeenSampled = false;
for (final String sample : samples) {
final Iterator<SAMRecordAlignmentState> iterator = readStates.iterator(sample);
final List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
int size = 0; // number of elements in this sample's pileup
int nDeletions = 0; // number of deletions in this sample's pileup
int nMQ0Reads = 0; // number of MQ0 reads in this sample's pileup (warning: current implementation includes N bases that are MQ0)
while (iterator.hasNext()) {
final SAMRecordAlignmentState state = iterator.next(); // state object with the read/offset information
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
final CigarElement nextElement = state.peekForwardOnGenome(); // next cigar element
final CigarElement lastElement = state.peekBackwardOnGenome(); // last cigar element
final boolean isSingleElementCigar = nextElement == lastElement;
final CigarOperator nextOp = nextElement.getOperator(); // next cigar operator
final CigarOperator lastOp = lastElement.getOperator(); // last cigar operator
int readOffset = state.getReadOffset(); // the base offset on this read
final boolean isBeforeDeletion = nextOp == CigarOperator.DELETION;
final boolean isAfterDeletion = lastOp == CigarOperator.DELETION;
final boolean isBeforeInsertion = nextOp == CigarOperator.INSERTION;
final boolean isAfterInsertion = lastOp == CigarOperator.INSERTION && !isSingleElementCigar;
final boolean isNextToSoftClip = nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart());
int nextElementLength = nextElement.getLength();
if (op == CigarOperator.N) // N's are never added to any pileup
continue;
if (op == CigarOperator.D) {
// TODO -- LIBS is totally busted for deletions so that reads with Ds right before Is in their CIGAR are broken; must fix
if (includeReadsWithDeletionAtLoci) { // only add deletions to the pileup if we are authorized to do so
pile.add(new PileupElement(read, readOffset, true, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, null, nextOp == CigarOperator.D ? nextElementLength : -1));
size++;
nDeletions++;
if (read.getMappingQuality() == 0)
nMQ0Reads++;
}
}
else {
if (!filterBaseInRead(read, location.getStart())) {
String insertedBaseString = null;
if (nextOp == CigarOperator.I) {
final int insertionOffset = isSingleElementCigar ? 0 : 1;
// TODO -- someone please implement a better fix for the single element insertion CIGAR!
if (isSingleElementCigar)
readOffset -= (nextElement.getLength() - 1); // LIBS has passed over the insertion bases!
insertedBaseString = new String(Arrays.copyOfRange(read.getReadBases(), readOffset + insertionOffset, readOffset + insertionOffset + nextElement.getLength()));
}
pile.add(new PileupElement(read, readOffset, false, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, insertedBaseString, nextElementLength));
size++;
if (read.getMappingQuality() == 0)
nMQ0Reads++;
}
}
}
if (pile.size() != 0) // if this pileup added at least one base, add it to the full pileup
fullPileup.put(sample, new ReadBackedPileupImpl(location, pile, size, nDeletions, nMQ0Reads));
}
updateReadStates(); // critical - must be called after we get the current state offsets and location
if (!fullPileup.isEmpty()) // if we got reads with non-D/N over the current position, we are done
nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location, fullPileup), hasBeenSampled);
}
}
private void updateReadStates() {
for (final String sample : samples) {
Iterator<SAMRecordAlignmentState> it = readStates.iterator(sample);
while (it.hasNext()) {
SAMRecordAlignmentState state = it.next();
CigarOperator op = state.stepForwardOnGenome();
if (op == null) {
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
// already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
it.remove(); // we've stepped off the end of the object
}
}
}
}
// -----------------------------------------------------------------------------------------------------------------
//
// getting the list of reads
//
// -----------------------------------------------------------------------------------------------------------------
/**
* Transfer current list of all unique reads that have ever been used in any pileup, clearing old list
*
* This list is guaranteed to only contain unique reads, even across calls to the this function. It is
* literally the unique set of reads ever seen.
*
* The list occurs in the same order as they are encountered in the underlying iterator.
*
* Takes the maintained list of submitted reads, and transfers it to the caller of this
* function. The old list of set to a new, cleanly allocated list so the caller officially
* owns the list returned by this call. This is the only way to clear the tracking
* of submitted reads, if enabled.
*
* The purpose of this function is allow users of LIBS to keep track of all of the reads pulled off the
* underlying SAMRecord iterator and that appeared at any point in the list of SAMRecordAlignmentState for
* any reads. This function is intended to allow users to efficiently reconstruct the unique set of reads
* used across all pileups. This is necessary for LIBS to handle because attempting to do
* so from the pileups coming out of LIBS is extremely expensive.
*
* This functionality is only available if LIBS was created with the argument to track the reads
*
* @throws UnsupportedOperationException if called when keepingSubmittedReads is false
*
* @return the current list
*/
@Ensures("result != null")
public List<SAMRecord> transferReadsFromAllPreviousPileups() {
return readStates.transferSubmittedReads();
}
/**
* Get the underlying list of tracked reads. For testing only
* @return a non-null list
*/
@Ensures("result != null")
protected List<SAMRecord> getReadsFromAllPreviousPileups() {
return readStates.getSubmittedReads();
}
// -----------------------------------------------------------------------------------------------------------------
//
// utility functions
//
// -----------------------------------------------------------------------------------------------------------------
/**
* Generic place to put per-base filters appropriate to LocusIteratorByState
*
* @param rec
* @param pos
* @return
*/
private boolean filterBaseInRead(GATKSAMRecord rec, long pos) {
return ReadUtils.isBaseInsideAdaptor(rec, pos);
}
/**
* Create a LIBSDownsamplingInfo object from the requested info in ReadProperties
*
* LIBS will invoke the Reservoir and Leveling downsamplers on the read stream if we're
* downsampling to coverage by sample. SAMDataSource will have refrained from applying
* any downsamplers to the read stream in this case, in the expectation that LIBS will
* manage the downsampling. The reason for this is twofold: performance (don't have to
* split/re-assemble the read stream in SAMDataSource), and to enable partial downsampling
* of reads (eg., using half of a read, and throwing the rest away).
*
* @param readInfo GATK engine information about what should be done to the reads
* @return a LIBS specific info holder about downsampling only
*/
private static LIBSDownsamplingInfo toDownsamplingInfo(final ReadProperties readInfo) {
final boolean performDownsampling = readInfo.getDownsamplingMethod() != null &&
readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
readInfo.getDownsamplingMethod().toCoverage != null;
final int coverage = performDownsampling ? readInfo.getDownsamplingMethod().toCoverage : 0;
return new LIBSDownsamplingInfo(performDownsampling, coverage);
}
}

View File

@ -1,351 +0,0 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.locusiterator.old;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.downsampling.Downsampler;
import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler;
import org.broadinstitute.sting.utils.locusiterator.LIBSDownsamplingInfo;
import java.util.*;
/**
* Manages and updates mapping from sample -> List of SAMRecordAlignmentState
*
* Optionally can keep track of all of the reads pulled off the iterator and
* that appeared at any point in the list of SAMRecordAlignmentState for any reads.
* This functionaly is only possible at this stage, as this object does the popping of
* reads off the underlying source iterator, and presents only a pileup-like interface
* of samples -> SAMRecordAlignmentStates. Reconstructing the unique set of reads
* used across all pileups is extremely expensive from that data structure.
*
* User: depristo
* Date: 1/5/13
* Time: 2:02 PM
*/
class ReadStateManager {
private final List<String> samples;
private final PeekableIterator<SAMRecord> iterator;
private final SamplePartitioner samplePartitioner;
private final Map<String, PerSampleReadStateManager> readStatesBySample = new HashMap<String, PerSampleReadStateManager>();
private LinkedList<SAMRecord> submittedReads;
private final boolean keepSubmittedReads;
private int totalReadStates = 0;
public ReadStateManager(final Iterator<SAMRecord> source,
final List<String> samples,
final LIBSDownsamplingInfo LIBSDownsamplingInfo,
final boolean keepSubmittedReads) {
this.samples = samples;
this.iterator = new PeekableIterator<SAMRecord>(source);
this.keepSubmittedReads = keepSubmittedReads;
this.submittedReads = new LinkedList<SAMRecord>();
for (final String sample : samples) {
readStatesBySample.put(sample, new PerSampleReadStateManager(LIBSDownsamplingInfo));
}
samplePartitioner = new SamplePartitioner(LIBSDownsamplingInfo, samples);
}
/**
* Returns a iterator over all the reads associated with the given sample. Note that remove() is implemented
* for this iterator; if present, total read states will be decremented.
*
* @param sample The sample.
* @return Iterator over the reads associated with that sample.
*/
public Iterator<SAMRecordAlignmentState> iterator(final String sample) {
return new Iterator<SAMRecordAlignmentState>() {
private Iterator<SAMRecordAlignmentState> wrappedIterator = readStatesBySample.get(sample).iterator();
public boolean hasNext() {
return wrappedIterator.hasNext();
}
public SAMRecordAlignmentState next() {
return wrappedIterator.next();
}
public void remove() {
wrappedIterator.remove();
}
};
}
public boolean isEmpty() {
return totalReadStates == 0;
}
/**
* Retrieves the total number of reads in the manager across all samples.
*
* @return Total number of reads over all samples.
*/
public int size() {
return totalReadStates;
}
/**
* Retrieves the total number of reads in the manager in the given sample.
*
* @param sample The sample.
* @return Total number of reads in the given sample.
*/
public int size(final String sample) {
return readStatesBySample.get(sample).size();
}
public SAMRecordAlignmentState getFirst() {
for (final String sample : samples) {
PerSampleReadStateManager reads = readStatesBySample.get(sample);
if (!reads.isEmpty())
return reads.peek();
}
return null;
}
public boolean hasNext() {
return totalReadStates > 0 || iterator.hasNext();
}
// fast testing of position
private boolean readIsPastCurrentPosition(SAMRecord read) {
if (isEmpty())
return false;
else {
SAMRecordAlignmentState state = getFirst();
SAMRecord ourRead = state.getRead();
return read.getReferenceIndex() > ourRead.getReferenceIndex() || read.getAlignmentStart() > state.getGenomePosition();
}
}
public void collectPendingReads() {
if (!iterator.hasNext())
return;
// the next record in the stream, peeked as to not remove it from the stream
if ( isEmpty() ) {
final int firstContigIndex = iterator.peek().getReferenceIndex();
final int firstAlignmentStart = iterator.peek().getAlignmentStart();
while (iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) {
submitRead(iterator.next());
}
} else {
// Fast fail in the case that the read is past the current position.
if (readIsPastCurrentPosition(iterator.peek()))
return;
while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) {
submitRead(iterator.next());
}
}
samplePartitioner.doneSubmittingReads();
for (final String sample : samples) {
Collection<SAMRecord> newReads = samplePartitioner.getReadsForSample(sample);
PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
addReadsToSample(statesBySample, newReads);
}
samplePartitioner.reset();
}
/**
* Add a read to the sample partitioner, potentially adding it to all submitted reads, if appropriate
* @param read a non-null read
*/
@Requires("read != null")
protected void submitRead(final SAMRecord read) {
if ( keepSubmittedReads )
submittedReads.add(read);
samplePartitioner.submitRead(read);
}
/**
* Transfer current list of submitted reads, clearing old list
*
* Takes the maintained list of submitted reads, and transfers it to the caller of this
* function. The old list of set to a new, cleanly allocated list so the caller officially
* owns the list returned by this call. This is the only way to clear the tracking
* of submitted reads, if enabled.
*
* How to use this function:
*
* while ( doing some work unit, such as creating pileup at some locus ):
* interact with ReadStateManager in some way to make work unit
* readsUsedInPileup = transferSubmittedReads)
*
* @throws UnsupportedOperationException if called when keepSubmittedReads is false
*
* @return the current list of submitted reads
*/
@Ensures({
"result != null",
"result != submittedReads" // result and previous submitted reads are not == objects
})
public List<SAMRecord> transferSubmittedReads() {
if ( ! keepSubmittedReads ) throw new UnsupportedOperationException("cannot transferSubmittedReads if you aren't keeping them");
final List<SAMRecord> prevSubmittedReads = submittedReads;
this.submittedReads = new LinkedList<SAMRecord>();
return prevSubmittedReads;
}
/**
* Are we keeping submitted reads, or not?
* @return true if we are keeping them, false otherwise
*/
public boolean isKeepingSubmittedReads() {
return keepSubmittedReads;
}
/**
* Obtain a pointer to the list of submitted reads.
*
* This is not a copy of the list; it is shared with this ReadStateManager. It should
* not be modified. Updates to this ReadStateManager may change the contains of the
* list entirely.
*
* For testing purposes only.
*
* Will always be empty if we are are not keepSubmittedReads
*
* @return a non-null list of reads that have been submitted to this ReadStateManager
*/
@Ensures({"result != null","keepSubmittedReads || result.isEmpty()"})
protected List<SAMRecord> getSubmittedReads() {
return submittedReads;
}
/**
* Add reads with the given sample name to the given hanger entry.
*
* @param readStates The list of read states to add this collection of reads.
* @param reads Reads to add. Selected reads will be pulled from this source.
*/
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads) {
if (reads.isEmpty())
return;
Collection<SAMRecordAlignmentState> newReadStates = new LinkedList<SAMRecordAlignmentState>();
for (SAMRecord read : reads) {
SAMRecordAlignmentState state = new SAMRecordAlignmentState(read);
state.stepForwardOnGenome();
newReadStates.add(state);
}
readStates.addStatesAtNextAlignmentStart(newReadStates);
}
protected class PerSampleReadStateManager implements Iterable<SAMRecordAlignmentState> {
private List<LinkedList<SAMRecordAlignmentState>> readStatesByAlignmentStart = new LinkedList<LinkedList<SAMRecordAlignmentState>>();
private final Downsampler<LinkedList<SAMRecordAlignmentState>> levelingDownsampler;
private int thisSampleReadStates = 0;
public PerSampleReadStateManager(final LIBSDownsamplingInfo LIBSDownsamplingInfo) {
this.levelingDownsampler = LIBSDownsamplingInfo.isPerformDownsampling()
? new LevelingDownsampler<LinkedList<SAMRecordAlignmentState>, SAMRecordAlignmentState>(LIBSDownsamplingInfo.getToCoverage())
: null;
}
public void addStatesAtNextAlignmentStart(Collection<SAMRecordAlignmentState> states) {
if ( states.isEmpty() ) {
return;
}
readStatesByAlignmentStart.add(new LinkedList<SAMRecordAlignmentState>(states));
thisSampleReadStates += states.size();
totalReadStates += states.size();
if ( levelingDownsampler != null ) {
levelingDownsampler.submit(readStatesByAlignmentStart);
levelingDownsampler.signalEndOfInput();
thisSampleReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
totalReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
// use returned List directly rather than make a copy, for efficiency's sake
readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems();
levelingDownsampler.reset();
}
}
public boolean isEmpty() {
return readStatesByAlignmentStart.isEmpty();
}
public SAMRecordAlignmentState peek() {
return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek();
}
public int size() {
return thisSampleReadStates;
}
public Iterator<SAMRecordAlignmentState> iterator() {
return new Iterator<SAMRecordAlignmentState>() {
private Iterator<LinkedList<SAMRecordAlignmentState>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
private LinkedList<SAMRecordAlignmentState> currentPositionReadStates = null;
private Iterator<SAMRecordAlignmentState> currentPositionReadStatesIterator = null;
public boolean hasNext() {
return alignmentStartIterator.hasNext() ||
(currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext());
}
public SAMRecordAlignmentState next() {
if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) {
currentPositionReadStates = alignmentStartIterator.next();
currentPositionReadStatesIterator = currentPositionReadStates.iterator();
}
return currentPositionReadStatesIterator.next();
}
public void remove() {
currentPositionReadStatesIterator.remove();
thisSampleReadStates--;
totalReadStates--;
if ( currentPositionReadStates.isEmpty() ) {
alignmentStartIterator.remove();
}
}
};
}
}
}

View File

@ -1,205 +0,0 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.locusiterator.old;
import com.google.java.contract.Requires;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.UserException;
/**
* Steps a single read along its alignment to the genome
*
* The logical model for generating extended events is as follows: the "record state"
* implements the traversal along the reference; thus stepForwardOnGenome() returns
* on every and only on actual reference bases. This can be a (mis)match or a deletion
* (in the latter case, we still return on every individual reference base the deletion spans).
* In the extended events mode, the record state also remembers if there was an insertion, or
* if the deletion just started *right before* the current reference base the record state is
* pointing to upon the return from stepForwardOnGenome(). The next call to stepForwardOnGenome()
* will clear that memory (as we remember only extended events immediately preceding
* the current reference base).
*
* User: depristo
* Date: 1/5/13
* Time: 1:08 PM
*/
public class SAMRecordAlignmentState {
// TODO -- one idea to clean up this functionality:
// TODO --
// TODO -- split functionality here into an alignment state machine and an
// TODO -- alignment state. The alignment state simply carries with it the
// TODO -- state of the alignment (the current cigar op, the genome offset,
// TODO -- the read offset, etc. The AlignmentStateMachine produces these
// TODO -- states, and has operations such stepForwardOnGenome, getLastState(),
// TODO -- getCurrentState(), getNextState();
/**
* Our read
*/
private final SAMRecord read;
private final Cigar cigar;
private final int nCigarElements;
/**
* how far are we offset from the start of the read bases?
*/
int readOffset = -1;
/**
* how far are we offset from the alignment start on the genome?
*/
int genomeOffset = -1;
int cigarOffset = -1;
CigarElement curElement = null;
/**
* how far are we into a single cigarElement?
*/
int cigarElementCounter = -1;
@Requires("read != null")
// TODO -- should enforce contracts like the read is aligned, etc
public SAMRecordAlignmentState(final SAMRecord read) {
this.read = read;
this.cigar = read.getCigar();
this.nCigarElements = cigar.numCigarElements();
}
public SAMRecord getRead() {
return read;
}
/**
* What is our current offset in the read's bases that aligns us with the reference genome?
*
* @return the current read offset position
*/
public int getReadOffset() {
return readOffset;
}
/**
* What is the current offset w.r.t. the alignment state that aligns us to the readOffset?
*
* @return the current offset
*/
public int getGenomeOffset() {
return genomeOffset;
}
public int getGenomePosition() {
return read.getAlignmentStart() + getGenomeOffset();
}
public GenomeLoc getLocation(GenomeLocParser genomeLocParser) {
return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
}
public CigarOperator getCurrentCigarOperator() {
return curElement.getOperator();
}
public String toString() {
return String.format("%s ro=%d go=%d co=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarOffset, cigarElementCounter, curElement);
}
public CigarElement peekForwardOnGenome() {
return ( cigarElementCounter + 1 > curElement.getLength() && cigarOffset + 1 < nCigarElements ? cigar.getCigarElement(cigarOffset + 1) : curElement );
}
public CigarElement peekBackwardOnGenome() {
return ( cigarElementCounter - 1 == 0 && cigarOffset - 1 > 0 ? cigar.getCigarElement(cigarOffset - 1) : curElement );
}
public CigarOperator stepForwardOnGenome() {
// we enter this method with readOffset = index of the last processed base on the read
// (-1 if we did not process a single base yet); this can be last matching base,
// or last base of an insertion
if (curElement == null || ++cigarElementCounter > curElement.getLength()) {
cigarOffset++;
if (cigarOffset < nCigarElements) {
curElement = cigar.getCigarElement(cigarOffset);
cigarElementCounter = 0;
// next line: guards against cigar elements of length 0; when new cigar element is retrieved,
// we reenter in order to re-check cigarElementCounter against curElement's length
return stepForwardOnGenome();
} else {
if (curElement != null && curElement.getOperator() == CigarOperator.D)
throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
// Reads that contain indels model the genomeOffset as the following base in the reference. Because
// we fall into this else block only when indels end the read, increment genomeOffset such that the
// current offset of this read is the next ref base after the end of the indel. This position will
// model a point on the reference somewhere after the end of the read.
genomeOffset++; // extended events need that. Logically, it's legal to advance the genomic offset here:
// we do step forward on the ref, and by returning null we also indicate that we are past the read end.
return null;
}
}
boolean done = false;
switch (curElement.getOperator()) {
case H: // ignore hard clips
case P: // ignore pads
cigarElementCounter = curElement.getLength();
break;
case I: // insertion w.r.t. the reference
case S: // soft clip
cigarElementCounter = curElement.getLength();
readOffset += curElement.getLength();
break;
case D: // deletion w.r.t. the reference
if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string
throw new UserException.MalformedBAM(read, "read starts with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
// should be the same as N case
genomeOffset++;
done = true;
break;
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
genomeOffset++;
done = true;
break;
case M:
case EQ:
case X:
readOffset++;
genomeOffset++;
done = true;
break;
default:
throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator());
}
return done ? curElement.getOperator() : stepForwardOnGenome();
}
}

View File

@ -1,82 +0,0 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.locusiterator.old;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.downsampling.Downsampler;
import org.broadinstitute.sting.gatk.downsampling.PassThroughDownsampler;
import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler;
import org.broadinstitute.sting.utils.locusiterator.LIBSDownsamplingInfo;
import java.util.*;
/**
* Divides reads by sample and (if requested) does a preliminary downsampling pass with a ReservoirDownsampler.
*
* Note: stores reads by sample ID string, not by sample object
*/
class SamplePartitioner {
private Map<String, Downsampler<SAMRecord>> readsBySample;
public SamplePartitioner(final LIBSDownsamplingInfo LIBSDownsamplingInfo, final List<String> samples) {
readsBySample = new HashMap<String, Downsampler<SAMRecord>>(samples.size());
for ( String sample : samples ) {
readsBySample.put(sample, createDownsampler(LIBSDownsamplingInfo));
}
}
private Downsampler<SAMRecord> createDownsampler(final LIBSDownsamplingInfo LIBSDownsamplingInfo) {
return LIBSDownsamplingInfo.isPerformDownsampling()
? new ReservoirDownsampler<SAMRecord>(LIBSDownsamplingInfo.getToCoverage())
: new PassThroughDownsampler<SAMRecord>();
}
public void submitRead(SAMRecord read) {
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
if (readsBySample.containsKey(sampleName))
readsBySample.get(sampleName).submit(read);
}
public void doneSubmittingReads() {
for ( Map.Entry<String, Downsampler<SAMRecord>> perSampleReads : readsBySample.entrySet() ) {
perSampleReads.getValue().signalEndOfInput();
}
}
public Collection<SAMRecord> getReadsForSample(String sampleName) {
if ( ! readsBySample.containsKey(sampleName) )
throw new NoSuchElementException("Sample name not found");
return readsBySample.get(sampleName).consumeFinalizedItems();
}
public void reset() {
for ( Map.Entry<String, Downsampler<SAMRecord>> perSampleReads : readsBySample.entrySet() ) {
perSampleReads.getValue().clear();
perSampleReads.getValue().reset();
}
}
}

View File

@ -67,32 +67,32 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
@Param
private Downsampling downsampling;
public void timeDownsampling(int reps) {
for(int i = 0; i < reps; i++) {
SAMFileReader reader = new SAMFileReader(inputFile);
ReadProperties readProperties = new ReadProperties(Collections.<SAMReaderID>singletonList(new SAMReaderID(inputFile,new Tags())),
reader.getFileHeader(),
SAMFileHeader.SortOrder.coordinate,
false,
SAMFileReader.ValidationStringency.SILENT,
downsampling.create(),
new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
Collections.<ReadFilter>emptyList(),
Collections.<ReadTransformer>emptyList(),
false,
(byte)0,
false);
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
// Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
LegacyLocusIteratorByState locusIteratorByState = new LegacyLocusIteratorByState(readIterator,readProperties,genomeLocParser, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups());
while(locusIteratorByState.hasNext()) {
locusIteratorByState.next().getLocation();
}
reader.close();
}
}
// public void timeDownsampling(int reps) {
// for(int i = 0; i < reps; i++) {
// SAMFileReader reader = new SAMFileReader(inputFile);
// ReadProperties readProperties = new ReadProperties(Collections.<SAMReaderID>singletonList(new SAMReaderID(inputFile,new Tags())),
// reader.getFileHeader(),
// SAMFileHeader.SortOrder.coordinate,
// false,
// SAMFileReader.ValidationStringency.SILENT,
// downsampling.create(),
// new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
// Collections.<ReadFilter>emptyList(),
// Collections.<ReadTransformer>emptyList(),
// false,
// (byte)0,
// false);
//
// GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
// // Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
// Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
// LegacyLocusIteratorByState locusIteratorByState = new LegacyLocusIteratorByState(readIterator,readProperties,genomeLocParser, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups());
// while(locusIteratorByState.hasNext()) {
// locusIteratorByState.next().getLocation();
// }
// reader.close();
// }
// }
private enum Downsampling {
NONE {

View File

@ -31,7 +31,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.locusiterator.old.SAMRecordAlignmentState;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -78,24 +77,24 @@ public class AlignmentStateMachinePerformance {
}
}
break;
case OLD_STATE:
{
final SAMRecordAlignmentState alignmentStateMachine = new SAMRecordAlignmentState(read);
while ( alignmentStateMachine.stepForwardOnGenome() != null ) {
alignmentStateMachine.getRead();
nIterations++;
}
}
break;
// case OLD_STATE:
// {
// final SAMRecordAlignmentState alignmentStateMachine = new SAMRecordAlignmentState(read);
// while ( alignmentStateMachine.stepForwardOnGenome() != null ) {
// alignmentStateMachine.getRead();
// nIterations++;
// }
// }
// break;
case NEW_LIBS:
{
final List<SAMRecord> reads = Collections.nCopies(30, (SAMRecord)read);
final List<SAMRecord> reads = Collections.nCopies(30, (SAMRecord) read);
final org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState libs =
new org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState(
new LocusIteratorByStateBaseTest.FakeCloseableIterator<SAMRecord>(reads.iterator()),
LocusIteratorByStateBaseTest.createTestReadProperties(),
genomeLocParser,
LocusIteratorByStateBaseTest.sampleListForSAMWithoutReadGroups());
LocusIteratorByState.sampleListForSAMWithoutReadGroups());
while ( libs.hasNext() ) {
AlignmentContext context = libs.next();

View File

@ -33,7 +33,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.locusiterator.old.SAMRecordAlignmentState;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -71,14 +70,29 @@ public class LocusIteratorBenchmark extends SimpleBenchmark {
}
}
public void timeOriginalLIBS(int rep) {
// public void timeOriginalLIBS(int rep) {
// for ( int i = 0; i < rep; i++ ) {
// final org.broadinstitute.sting.utils.locusiterator.old.LocusIteratorByState libs =
// new org.broadinstitute.sting.utils.locusiterator.old.LocusIteratorByState(
// new LocusIteratorByStateBaseTest.FakeCloseableIterator<SAMRecord>(reads.iterator()),
// LocusIteratorByStateBaseTest.createTestReadProperties(),
// genomeLocParser,
// LocusIteratorByState.sampleListForSAMWithoutReadGroups());
//
// while ( libs.hasNext() ) {
// AlignmentContext context = libs.next();
// }
// }
// }
public void timeLegacyLIBS(int rep) {
for ( int i = 0; i < rep; i++ ) {
final org.broadinstitute.sting.utils.locusiterator.old.LocusIteratorByState libs =
new org.broadinstitute.sting.utils.locusiterator.old.LocusIteratorByState(
final org.broadinstitute.sting.utils.locusiterator.legacy.LegacyLocusIteratorByState libs =
new org.broadinstitute.sting.utils.locusiterator.legacy.LegacyLocusIteratorByState(
new LocusIteratorByStateBaseTest.FakeCloseableIterator<SAMRecord>(reads.iterator()),
LocusIteratorByStateBaseTest.createTestReadProperties(),
genomeLocParser,
LocusIteratorByStateBaseTest.sampleListForSAMWithoutReadGroups());
LocusIteratorByState.sampleListForSAMWithoutReadGroups());
while ( libs.hasNext() ) {
AlignmentContext context = libs.next();
@ -93,7 +107,7 @@ public class LocusIteratorBenchmark extends SimpleBenchmark {
new LocusIteratorByStateBaseTest.FakeCloseableIterator<SAMRecord>(reads.iterator()),
LocusIteratorByStateBaseTest.createTestReadProperties(),
genomeLocParser,
LocusIteratorByStateBaseTest.sampleListForSAMWithoutReadGroups());
LocusIteratorByState.sampleListForSAMWithoutReadGroups());
while ( libs.hasNext() ) {
AlignmentContext context = libs.next();
@ -101,16 +115,16 @@ public class LocusIteratorBenchmark extends SimpleBenchmark {
}
}
public void timeOriginalLIBSStateMachine(int rep) {
for ( int i = 0; i < rep; i++ ) {
for ( final SAMRecord read : reads ) {
final SAMRecordAlignmentState alignmentStateMachine = new SAMRecordAlignmentState(read);
while ( alignmentStateMachine.stepForwardOnGenome() != null ) {
alignmentStateMachine.getGenomeOffset();
}
}
}
}
// public void timeOriginalLIBSStateMachine(int rep) {
// for ( int i = 0; i < rep; i++ ) {
// for ( final SAMRecord read : reads ) {
// final SAMRecordAlignmentState alignmentStateMachine = new SAMRecordAlignmentState(read);
// while ( alignmentStateMachine.stepForwardOnGenome() != null ) {
// alignmentStateMachine.getGenomeOffset();
// }
// }
// }
// }
public void timeAlignmentStateMachine(int rep) {
for ( int i = 0; i < rep; i++ ) {

View File

@ -57,22 +57,12 @@ public class LocusIteratorByStateBaseTest extends BaseTest {
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
}
/**
* For testing only. Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list
* for the system.
*/
public static List<String> sampleListForSAMWithoutReadGroups() {
List<String> samples = new ArrayList<String>();
samples.add(null);
return samples;
}
protected LocusIteratorByState makeLTBS(List<SAMRecord> reads,
ReadProperties readAttributes) {
return new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),
readAttributes,
genomeLocParser,
sampleListForSAMWithoutReadGroups());
LocusIteratorByState.sampleListForSAMWithoutReadGroups());
}
public static ReadProperties createTestReadProperties() {

View File

@ -27,9 +27,6 @@ package org.broadinstitute.sting.utils.locusiterator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.locusiterator.LIBSDownsamplingInfo;
import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByStateBaseTest;
import org.broadinstitute.sting.utils.locusiterator.old.SAMRecordAlignmentState;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
@ -65,7 +62,7 @@ public class ReadStateManagerUnitTest extends LocusIteratorByStateBaseTest {
}
public void run() {
final List<String> samples = sampleListForSAMWithoutReadGroups();
final List<String> samples = LocusIteratorByState.sampleListForSAMWithoutReadGroups();
final Iterator<SAMRecord> iterator = new LinkedList<SAMRecord>().iterator();
ReadStateManager readStateManager = new ReadStateManager(iterator, samples, LIBSDownsamplingInfo.NO_DOWNSAMPLING, false);
ReadStateManager.PerSampleReadStateManager perSampleReadStateManager = readStateManager.new PerSampleReadStateManager(LIBSDownsamplingInfo.NO_DOWNSAMPLING);

View File

@ -1,463 +0,0 @@
//package org.broadinstitute.sting.utils.locusiterator.old;
//
//import net.sf.samtools.*;
//import org.broadinstitute.sting.gatk.ReadProperties;
//import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
//import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
//import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
//import org.broadinstitute.sting.utils.NGSPlatform;
//import org.broadinstitute.sting.utils.Utils;
//import org.broadinstitute.sting.utils.locusiterator.LIBS_position;
//import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByStateBaseTest;
//import org.broadinstitute.sting.utils.locusiterator.old.LocusIteratorByState;
//import org.broadinstitute.sting.utils.pileup.PileupElement;
//import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
//import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
//import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
//import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
//import org.testng.Assert;
//import org.testng.annotations.DataProvider;
//import org.testng.annotations.Test;
//
//import java.util.*;
//
///**
// * testing of the new (non-legacy) version of LocusIteratorByState
// */
//public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
//
// // TODO -- REMOVE ME WHEN LIBS IS FIXED
// // TODO -- CURRENT CODE DOESN'T CORRECTLY COMPUTE THINGS LIKE BEFORE DELETION, AFTER INSERTION, ETC
// private final static boolean ALLOW_BROKEN_LIBS_STATE = true;
//
// protected LocusIteratorByState li;
//
// @Test
// public void testXandEQOperators() {
// final byte[] bases1 = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
// final byte[] bases2 = new byte[] {'A','A','A','C','A','A','A','A','A','C'};
//
// // create a test version of the Reads object
// ReadProperties readAttributes = createTestReadProperties();
//
// SAMRecord r1 = ArtificialSAMUtils.createArtificialRead(header,"r1",0,1,10);
// r1.setReadBases(bases1);
// r1.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
// r1.setCigarString("10M");
//
// SAMRecord r2 = ArtificialSAMUtils.createArtificialRead(header,"r2",0,1,10);
// r2.setReadBases(bases2);
// r2.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
// r2.setCigarString("3=1X5=1X");
//
// SAMRecord r3 = ArtificialSAMUtils.createArtificialRead(header,"r3",0,1,10);
// r3.setReadBases(bases2);
// r3.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
// r3.setCigarString("3=1X5M1X");
//
// SAMRecord r4 = ArtificialSAMUtils.createArtificialRead(header,"r4",0,1,10);
// r4.setReadBases(bases2);
// r4.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
// r4.setCigarString("10M");
//
// List<SAMRecord> reads = Arrays.asList(r1, r2, r3, r4);
//
// // create the iterator by state with the fake reads and fake records
// li = makeLTBS(reads,readAttributes);
//
// while (li.hasNext()) {
// AlignmentContext context = li.next();
// ReadBackedPileup pileup = context.getBasePileup();
// Assert.assertEquals(pileup.depthOfCoverage(), 4);
// }
// }
//
// @Test
// public void testIndelsInRegularPileup() {
// final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
// final byte[] indelBases = new byte[] {'A','A','A','A','C','T','A','A','A','A','A','A'};
//
// // create a test version of the Reads object
// ReadProperties readAttributes = createTestReadProperties();
//
// SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
// before.setReadBases(bases);
// before.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
// before.setCigarString("10M");
//
// SAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,2,10);
// during.setReadBases(indelBases);
// during.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
// during.setCigarString("4M2I6M");
//
// SAMRecord after = ArtificialSAMUtils.createArtificialRead(header,"after",0,3,10);
// after.setReadBases(bases);
// after.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
// after.setCigarString("10M");
//
// List<SAMRecord> reads = Arrays.asList(before, during, after);
//
// // create the iterator by state with the fake reads and fake records
// li = makeLTBS(reads,readAttributes);
//
// boolean foundIndel = false;
// while (li.hasNext()) {
// AlignmentContext context = li.next();
// ReadBackedPileup pileup = context.getBasePileup().getBaseFilteredPileup(10);
// for (PileupElement p : pileup) {
// if (p.isBeforeInsertion()) {
// foundIndel = true;
// Assert.assertEquals(p.getLengthOfImmediatelyFollowingIndel(), 2, "Wrong event length");
// Assert.assertEquals(p.getBasesOfImmediatelyFollowingInsertion(), "CT", "Inserted bases are incorrect");
// break;
// }
// }
//
// }
//
// Assert.assertTrue(foundIndel,"Indel in pileup not found");
// }
//
// @Test
// public void testWholeIndelReadInIsolation() {
// final int firstLocus = 44367789;
//
// // create a test version of the Reads object
// ReadProperties readAttributes = createTestReadProperties();
//
// SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header, "indelOnly", 0, firstLocus, 76);
// indelOnlyRead.setReadBases(Utils.dupBytes((byte)'A',76));
// indelOnlyRead.setBaseQualities(Utils.dupBytes((byte) '@', 76));
// indelOnlyRead.setCigarString("76I");
//
// List<SAMRecord> reads = Arrays.asList(indelOnlyRead);
//
// // create the iterator by state with the fake reads and fake records
// li = makeLTBS(reads, readAttributes);
//
// // Traditionally, reads that end with indels bleed into the pileup at the following locus. Verify that the next pileup contains this read
// // and considers it to be an indel-containing read.
// Assert.assertTrue(li.hasNext(),"Should have found a whole-indel read in the normal base pileup without extended events enabled");
// AlignmentContext alignmentContext = li.next();
// Assert.assertEquals(alignmentContext.getLocation().getStart(), firstLocus, "Base pileup is at incorrect location.");
// ReadBackedPileup basePileup = alignmentContext.getBasePileup();
// Assert.assertEquals(basePileup.getReads().size(),1,"Pileup is of incorrect size");
// Assert.assertSame(basePileup.getReads().get(0), indelOnlyRead, "Read in pileup is incorrect");
// }
//
// /**
// * Test to make sure that reads supporting only an indel (example cigar string: 76I) do
// * not negatively influence the ordering of the pileup.
// */
// @Test
// public void testWholeIndelRead() {
// final int firstLocus = 44367788, secondLocus = firstLocus + 1;
//
// SAMRecord leadingRead = ArtificialSAMUtils.createArtificialRead(header,"leading",0,firstLocus,76);
// leadingRead.setReadBases(Utils.dupBytes((byte)'A',76));
// leadingRead.setBaseQualities(Utils.dupBytes((byte)'@',76));
// leadingRead.setCigarString("1M75I");
//
// SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header,"indelOnly",0,secondLocus,76);
// indelOnlyRead.setReadBases(Utils.dupBytes((byte) 'A', 76));
// indelOnlyRead.setBaseQualities(Utils.dupBytes((byte)'@',76));
// indelOnlyRead.setCigarString("76I");
//
// SAMRecord fullMatchAfterIndel = ArtificialSAMUtils.createArtificialRead(header,"fullMatch",0,secondLocus,76);
// fullMatchAfterIndel.setReadBases(Utils.dupBytes((byte)'A',76));
// fullMatchAfterIndel.setBaseQualities(Utils.dupBytes((byte)'@',76));
// fullMatchAfterIndel.setCigarString("75I1M");
//
// List<SAMRecord> reads = Arrays.asList(leadingRead, indelOnlyRead, fullMatchAfterIndel);
//
// // create the iterator by state with the fake reads and fake records
// li = makeLTBS(reads, createTestReadProperties());
// int currentLocus = firstLocus;
// int numAlignmentContextsFound = 0;
//
// while(li.hasNext()) {
// AlignmentContext alignmentContext = li.next();
// Assert.assertEquals(alignmentContext.getLocation().getStart(),currentLocus,"Current locus returned by alignment context is incorrect");
//
// if(currentLocus == firstLocus) {
// List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
// Assert.assertEquals(readsAtLocus.size(),1,"Wrong number of reads at locus " + currentLocus);
// Assert.assertSame(readsAtLocus.get(0),leadingRead,"leadingRead absent from pileup at locus " + currentLocus);
// }
// else if(currentLocus == secondLocus) {
// List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
// Assert.assertEquals(readsAtLocus.size(),2,"Wrong number of reads at locus " + currentLocus);
// Assert.assertSame(readsAtLocus.get(0),indelOnlyRead,"indelOnlyRead absent from pileup at locus " + currentLocus);
// Assert.assertSame(readsAtLocus.get(1),fullMatchAfterIndel,"fullMatchAfterIndel absent from pileup at locus " + currentLocus);
// }
//
// currentLocus++;
// numAlignmentContextsFound++;
// }
//
// Assert.assertEquals(numAlignmentContextsFound, 2, "Found incorrect number of alignment contexts");
// }
//
// /**
// * Test to make sure that reads supporting only an indel (example cigar string: 76I) are represented properly
// */
// @Test
// public void testWholeIndelReadRepresentedTest() {
// final int firstLocus = 44367788, secondLocus = firstLocus + 1;
//
// SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,secondLocus,1);
// read1.setReadBases(Utils.dupBytes((byte) 'A', 1));
// read1.setBaseQualities(Utils.dupBytes((byte) '@', 1));
// read1.setCigarString("1I");
//
// List<SAMRecord> reads = Arrays.asList(read1);
//
// // create the iterator by state with the fake reads and fake records
// li = makeLTBS(reads, createTestReadProperties());
//
// while(li.hasNext()) {
// AlignmentContext alignmentContext = li.next();
// ReadBackedPileup p = alignmentContext.getBasePileup();
// Assert.assertTrue(p.getNumberOfElements() == 1);
// PileupElement pe = p.iterator().next();
// Assert.assertTrue(pe.isBeforeInsertion());
// Assert.assertFalse(pe.isAfterInsertion());
// Assert.assertEquals(pe.getBasesOfImmediatelyFollowingInsertion(), "A");
// }
//
// SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,secondLocus,10);
// read2.setReadBases(Utils.dupBytes((byte) 'A', 10));
// read2.setBaseQualities(Utils.dupBytes((byte) '@', 10));
// read2.setCigarString("10I");
//
// reads = Arrays.asList(read2);
//
// // create the iterator by state with the fake reads and fake records
// li = makeLTBS(reads, createTestReadProperties());
//
// while(li.hasNext()) {
// AlignmentContext alignmentContext = li.next();
// ReadBackedPileup p = alignmentContext.getBasePileup();
// Assert.assertTrue(p.getNumberOfElements() == 1);
// PileupElement pe = p.iterator().next();
// Assert.assertTrue(pe.isBeforeInsertion());
// Assert.assertFalse(pe.isAfterInsertion());
// Assert.assertEquals(pe.getBasesOfImmediatelyFollowingInsertion(), "AAAAAAAAAA");
// }
// }
//
// ////////////////////////////////////////////
// // comprehensive LIBS/PileupElement tests //
// ////////////////////////////////////////////
//
// @DataProvider(name = "LIBSTest")
// public Object[][] makeLIBSTest() {
// final List<Object[]> tests = new LinkedList<Object[]>();
//
// tests.add(new Object[]{new LIBSTest("1I", 1)});
// tests.add(new Object[]{new LIBSTest("10I", 10)});
// tests.add(new Object[]{new LIBSTest("2M2I2M", 6)});
// tests.add(new Object[]{new LIBSTest("2M2I", 4)});
// //TODO -- uncomment these when LIBS is fixed
// //{new LIBSTest("2I2M", 4, Arrays.asList(2,3), Arrays.asList(IS_AFTER_INSERTION_FLAG,0))},
// //{new LIBSTest("1I1M1D1M", 3, Arrays.asList(0,1), Arrays.asList(IS_AFTER_INSERTION_FLAG | IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG))},
// //{new LIBSTest("1S1I1M", 3, Arrays.asList(2), Arrays.asList(IS_AFTER_INSERTION_FLAG))},
// //{new LIBSTest("1M2D2M", 3)},
// tests.add(new Object[]{new LIBSTest("1S1M", 2)});
// tests.add(new Object[]{new LIBSTest("1M1S", 2)});
// tests.add(new Object[]{new LIBSTest("1S1M1I", 3)});
//
// return tests.toArray(new Object[][]{});
//
// // TODO -- enable combinatorial tests here when LIBS is fixed
//// return createLIBSTests(
//// Arrays.asList(1, 10),
//// Arrays.asList(1, 2, 3));
// }
//
// @Test(dataProvider = "LIBSTest")
// public void testLIBS(LIBSTest params) {
// if ( params.getElements() == null || params.getElements().get(0).getOperator() == CigarOperator.I )
// // TODO -- ENABLE ME WHEN LIBS IS FIXED
// return;
//
// // create the iterator by state with the fake reads and fake records
// final GATKSAMRecord read = params.makeRead();
// li = makeLTBS(Arrays.asList((SAMRecord)read), createTestReadProperties());
// final LIBS_position tester = new LIBS_position(read);
//
// int bpVisited = 0;
// while ( li.hasNext() ) {
// bpVisited++;
//
// AlignmentContext alignmentContext = li.next();
// ReadBackedPileup p = alignmentContext.getBasePileup();
// Assert.assertTrue(p.getNumberOfElements() == 1);
// PileupElement pe = p.iterator().next();
//
// tester.stepForwardOnGenome();
//
// if ( ! ALLOW_BROKEN_LIBS_STATE ) {
// Assert.assertEquals(pe.isBeforeDeletedBase(), tester.isBeforeDeletedBase);
// Assert.assertEquals(pe.isBeforeDeletionStart(), tester.isBeforeDeletionStart);
// Assert.assertEquals(pe.isAfterDeletedBase(), tester.isAfterDeletedBase);
// Assert.assertEquals(pe.isAfterDeletionEnd(), tester.isAfterDeletionEnd);
// Assert.assertEquals(pe.isBeforeInsertion(), tester.isBeforeInsertion);
// Assert.assertEquals(pe.isAfterInsertion(), tester.isAfterInsertion);
// Assert.assertEquals(pe.isNextToSoftClip(), tester.isNextToSoftClip);
// }
//
// Assert.assertEquals(pe.getOffset(), tester.getCurrentReadOffset());
// }
//
// // min is one because always visit something, even for 10I reads
// final int expectedBpToVisit = Math.max(read.getAlignmentEnd() - read.getAlignmentStart() + 1, 1);
// Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp");
// }
//
// // ------------------------------------------------------------
// //
// // Tests for keeping reads
// //
// // ------------------------------------------------------------
//
// @DataProvider(name = "LIBSKeepSubmittedReads")
// public Object[][] makeLIBSKeepSubmittedReads() {
// final List<Object[]> tests = new LinkedList<Object[]>();
//
// for ( final boolean doSampling : Arrays.asList(true, false) ) {
// for ( final int nReadsPerLocus : Arrays.asList(1, 10) ) {
// for ( final int nLoci : Arrays.asList(1, 10, 25) ) {
// for ( final int nSamples : Arrays.asList(1, 2, 10) ) {
// for ( final boolean keepReads : Arrays.asList(true, false) ) {
// for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true, false) ) {
//// for ( final int nReadsPerLocus : Arrays.asList(1) ) {
//// for ( final int nLoci : Arrays.asList(10) ) {
//// for ( final int nSamples : Arrays.asList(1) ) {
//// for ( final boolean keepReads : Arrays.asList(true) ) {
//// for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true) ) {
// tests.add(new Object[]{nReadsPerLocus, nLoci, nSamples, keepReads, grabReadsAfterEachCycle, doSampling});
// }
// }
// }
// }
// }
// }
//
// return tests.toArray(new Object[][]{});
// }
//
// @Test(enabled = true, dataProvider = "LIBSKeepSubmittedReads")
// public void testLIBSKeepSubmittedReads(final int nReadsPerLocus,
// final int nLoci,
// final int nSamples,
// final boolean keepReads,
// final boolean grabReadsAfterEachCycle,
// final boolean downsample) {
// logger.warn(String.format("testLIBSKeepSubmittedReads %d %d %d %b %b %b", nReadsPerLocus, nLoci, nSamples, keepReads, grabReadsAfterEachCycle, downsample));
// final int readLength = 10;
//
// final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 100000);
// final List<String> samples = new ArrayList<String>(nSamples);
// for ( int i = 0; i < nSamples; i++ ) {
// final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("rg" + i);
// final String sample = "sample" + i;
// samples.add(sample);
// rg.setSample(sample);
// rg.setPlatform(NGSPlatform.ILLUMINA.getDefaultPlatform());
// header.addReadGroup(rg);
// }
//
// final int maxCoveragePerSampleAtLocus = nReadsPerLocus * readLength / 2;
// final int maxDownsampledCoverage = Math.max(maxCoveragePerSampleAtLocus / 2, 1);
// final DownsamplingMethod downsampler = downsample
// ? new DownsamplingMethod(DownsampleType.BY_SAMPLE, maxDownsampledCoverage, null, false)
// : new DownsamplingMethod(DownsampleType.NONE, null, null, false);
// final List<SAMRecord> reads = ArtificialSAMUtils.createReadStream(nReadsPerLocus, nLoci, header, 1, readLength);
// li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),
// createTestReadProperties(downsampler, keepReads),
// genomeLocParser,
// samples);
//
// final Set<SAMRecord> seenSoFar = new HashSet<SAMRecord>();
// final Set<SAMRecord> keptReads = new HashSet<SAMRecord>();
// int bpVisited = 0;
// while ( li.hasNext() ) {
// bpVisited++;
// final AlignmentContext alignmentContext = li.next();
// final ReadBackedPileup p = alignmentContext.getBasePileup();
//
// if ( downsample ) {
// // just not a safe test
// //Assert.assertTrue(p.getNumberOfElements() <= maxDownsampledCoverage * nSamples, "Too many reads at locus after downsampling");
// } else {
// final int minPileupSize = nReadsPerLocus * nSamples;
// Assert.assertTrue(p.getNumberOfElements() >= minPileupSize);
// }
//
// seenSoFar.addAll(p.getReads());
// if ( keepReads && grabReadsAfterEachCycle ) {
// final List<SAMRecord> locusReads = li.transferReadsFromAllPreviousPileups();
//
// // the number of reads starting here
// int nReadsStartingHere = 0;
// for ( final SAMRecord read : p.getReads() )
// if ( read.getAlignmentStart() == alignmentContext.getPosition() )
// nReadsStartingHere++;
//
// if ( downsample )
// // with downsampling we might have some reads here that were downsampled away
// // in the pileup
// Assert.assertTrue(locusReads.size() >= nReadsStartingHere);
// else
// Assert.assertEquals(locusReads.size(), nReadsStartingHere);
// keptReads.addAll(locusReads);
//
// // check that all reads we've seen so far are in our keptReads
// for ( final SAMRecord read : seenSoFar ) {
// Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read);
// }
// }
//
// if ( ! keepReads )
// Assert.assertTrue(li.getReadsFromAllPreviousPileups().isEmpty(), "Not keeping reads but the underlying list of reads isn't empty");
// }
//
// if ( keepReads && ! grabReadsAfterEachCycle )
// keptReads.addAll(li.transferReadsFromAllPreviousPileups());
//
// if ( ! downsample ) { // downsampling may drop loci
// final int expectedBpToVisit = nLoci + readLength - 1;
// Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp");
// }
//
// if ( keepReads ) {
// // check we have the right number of reads
// final int totalReads = nLoci * nReadsPerLocus * nSamples;
// if ( ! downsample ) { // downsampling may drop reads
// Assert.assertEquals(keptReads.size(), totalReads, "LIBS didn't keep the right number of reads during the traversal");
//
// // check that the order of reads is the same as in our read list
// for ( int i = 0; i < reads.size(); i++ ) {
// final SAMRecord inputRead = reads.get(i);
// final SAMRecord keptRead = reads.get(i);
// Assert.assertSame(keptRead, inputRead, "Input reads and kept reads differ at position " + i);
// }
// } else {
// Assert.assertTrue(keptReads.size() <= totalReads, "LIBS didn't keep the right number of reads during the traversal");
// }
//
// // check uniqueness
// final Set<String> readNames = new HashSet<String>();
// for ( final SAMRecord read : keptReads ) {
// Assert.assertFalse(readNames.contains(read.getReadName()), "Found duplicate reads in the kept reads");
// readNames.add(read.getReadName());
// }
//
// // check that all reads we've seen are in our keptReads
// for ( final SAMRecord read : seenSoFar ) {
// Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read);
// }
// }
// }
//}

View File

@ -1,81 +0,0 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.locusiterator.old;
import org.broadinstitute.sting.utils.locusiterator.LIBS_position;
import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByStateBaseTest;
import org.broadinstitute.sting.utils.locusiterator.old.SAMRecordAlignmentState;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
/**
* testing of the new (non-legacy) version of LocusIteratorByState
*/
public class SAMRecordAlignmentStateUnitTest extends LocusIteratorByStateBaseTest {
@DataProvider(name = "AlignmentStateTest")
public Object[][] makeAlignmentStateTest() {
// return new Object[][]{{new LIBSTest("1I", 1)}};
return createLIBSTests(
Arrays.asList(1, 2),
Arrays.asList(1, 2, 3, 4));
}
@Test(dataProvider = "AlignmentStateTest")
public void testAlignmentStateTest(LIBSTest params) {
final GATKSAMRecord read = params.makeRead();
final SAMRecordAlignmentState state = new SAMRecordAlignmentState(read);
final LIBS_position tester = new LIBS_position(read);
Assert.assertSame(state.getRead(), read);
Assert.assertNotNull(state.toString());
int bpVisited = 0;
int lastOffset = -1;
while ( state.stepForwardOnGenome() != null ) {
bpVisited++;
tester.stepForwardOnGenome();
Assert.assertTrue(state.getReadOffset() >= lastOffset, "Somehow read offsets are decreasing: lastOffset " + lastOffset + " current " + state.getReadOffset());
Assert.assertEquals(state.getReadOffset(), tester.getCurrentReadOffset(), "Read offsets are wrong at " + bpVisited);
// TODO -- state.peekBackwardOnGenome();
// TODO -- state.peekForwardOnGenome();
// TODO -- state.getCurrentCigarOperator()
// TODO -- state.getGenomeOffset();
// TODO -- state.getGenomePosition();
// TODO -- Assert.assertEquals(state.getLocation(genomeLocParser), EXPECTATION);
lastOffset = state.getReadOffset();
}
// min is one because always visit something, even for 10I reads
final int expectedBpToVisit = read.getAlignmentEnd() - read.getAlignmentStart() + 1;
Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp");
}
}