Refactor LIBS into component parts, expand unit tests, some code cleanup
-- Split out all of the inner classes of LIBS into separate independent classes -- Split / add unit tests for many of these components. -- Radically expand unit tests for SAMRecordAlignmentState (the lowest level piece of code) making sure at least some of it works -- No need to change unit tests or integration tests. No change in functionality. -- Added (currently disabled) code to track all submitted reads to LIBS, but this isn't accessible or tested
This commit is contained in:
parent
2e5d38fd0e
commit
b3ecfbfce8
|
|
@ -29,7 +29,7 @@ import net.sf.picard.util.PeekableIterator;
|
|||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.utils.locusiterator.LegacyLocusIteratorByState;
|
||||
import org.broadinstitute.sting.utils.locusiterator.legacy.LegacyLocusIteratorByState;
|
||||
import org.broadinstitute.sting.utils.locusiterator.LocusIterator;
|
||||
import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.locusiterator;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 1/5/13
|
||||
* Time: 1:26 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class LIBSDownsamplingInfo {
|
||||
public final static LIBSDownsamplingInfo NO_DOWNSAMPLING = new LIBSDownsamplingInfo(false, -1);
|
||||
|
||||
final private boolean performDownsampling;
|
||||
final private int toCoverage;
|
||||
|
||||
LIBSDownsamplingInfo(boolean performDownsampling, int toCoverage) {
|
||||
this.performDownsampling = performDownsampling;
|
||||
this.toCoverage = toCoverage;
|
||||
}
|
||||
|
||||
public boolean isPerformDownsampling() {
|
||||
return performDownsampling;
|
||||
}
|
||||
|
||||
public int getToCoverage() {
|
||||
return toCoverage;
|
||||
}
|
||||
}
|
||||
|
|
@ -25,8 +25,6 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.locusiterator;
|
||||
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
|
@ -36,7 +34,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.downsampling.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.locusiterator.legacy.LegacyLocusIteratorByState;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -65,152 +63,10 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
private final GenomeLocParser genomeLocParser;
|
||||
private final ArrayList<String> samples;
|
||||
private final ReadStateManager readStates;
|
||||
private final boolean keepSubmittedReads;
|
||||
private final boolean includeReadsWithDeletionAtLoci;
|
||||
|
||||
protected static class SAMRecordState {
|
||||
SAMRecord read;
|
||||
int readOffset = -1; // how far are we offset from the start of the read bases?
|
||||
int genomeOffset = -1; // how far are we offset from the alignment start on the genome?
|
||||
|
||||
Cigar cigar = null;
|
||||
int cigarOffset = -1;
|
||||
CigarElement curElement = null;
|
||||
int nCigarElements = 0;
|
||||
|
||||
int cigarElementCounter = -1; // how far are we into a single cigarElement
|
||||
|
||||
// The logical model for generating extended events is as follows: the "record state" implements the traversal
|
||||
// along the reference; thus stepForwardOnGenome() returns on every and only on actual reference bases. This
|
||||
// can be a (mis)match or a deletion (in the latter case, we still return on every individual reference base the
|
||||
// deletion spans). In the extended events mode, the record state also remembers if there was an insertion, or
|
||||
// if the deletion just started *right before* the current reference base the record state is pointing to upon the return from
|
||||
// stepForwardOnGenome(). The next call to stepForwardOnGenome() will clear that memory (as we remember only extended
|
||||
// events immediately preceding the current reference base).
|
||||
|
||||
public SAMRecordState(SAMRecord read) {
|
||||
this.read = read;
|
||||
cigar = read.getCigar();
|
||||
nCigarElements = cigar.numCigarElements();
|
||||
|
||||
//System.out.printf("Creating a SAMRecordState: %s%n", this);
|
||||
}
|
||||
|
||||
public SAMRecord getRead() {
|
||||
return read;
|
||||
}
|
||||
|
||||
/**
|
||||
* What is our current offset in the read's bases that aligns us with the reference genome?
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getReadOffset() {
|
||||
return readOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* What is the current offset w.r.t. the alignment state that aligns us to the readOffset?
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getGenomeOffset() {
|
||||
return genomeOffset;
|
||||
}
|
||||
|
||||
public int getGenomePosition() {
|
||||
return read.getAlignmentStart() + getGenomeOffset();
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation(GenomeLocParser genomeLocParser) {
|
||||
return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
|
||||
}
|
||||
|
||||
public CigarOperator getCurrentCigarOperator() {
|
||||
return curElement.getOperator();
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s ro=%d go=%d co=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarOffset, cigarElementCounter, curElement);
|
||||
}
|
||||
|
||||
public CigarElement peekForwardOnGenome() {
|
||||
return ( cigarElementCounter + 1 > curElement.getLength() && cigarOffset + 1 < nCigarElements ? cigar.getCigarElement(cigarOffset + 1) : curElement );
|
||||
}
|
||||
|
||||
public CigarElement peekBackwardOnGenome() {
|
||||
return ( cigarElementCounter - 1 == 0 && cigarOffset - 1 > 0 ? cigar.getCigarElement(cigarOffset - 1) : curElement );
|
||||
}
|
||||
|
||||
|
||||
public CigarOperator stepForwardOnGenome() {
|
||||
// we enter this method with readOffset = index of the last processed base on the read
|
||||
// (-1 if we did not process a single base yet); this can be last matching base, or last base of an insertion
|
||||
|
||||
|
||||
if (curElement == null || ++cigarElementCounter > curElement.getLength()) {
|
||||
cigarOffset++;
|
||||
if (cigarOffset < nCigarElements) {
|
||||
curElement = cigar.getCigarElement(cigarOffset);
|
||||
cigarElementCounter = 0;
|
||||
// next line: guards against cigar elements of length 0; when new cigar element is retrieved,
|
||||
// we reenter in order to re-check cigarElementCounter against curElement's length
|
||||
return stepForwardOnGenome();
|
||||
} else {
|
||||
if (curElement != null && curElement.getOperator() == CigarOperator.D)
|
||||
throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
|
||||
|
||||
// Reads that contain indels model the genomeOffset as the following base in the reference. Because
|
||||
// we fall into this else block only when indels end the read, increment genomeOffset such that the
|
||||
// current offset of this read is the next ref base after the end of the indel. This position will
|
||||
// model a point on the reference somewhere after the end of the read.
|
||||
genomeOffset++; // extended events need that. Logically, it's legal to advance the genomic offset here:
|
||||
// we do step forward on the ref, and by returning null we also indicate that we are past the read end.
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
boolean done = false;
|
||||
switch (curElement.getOperator()) {
|
||||
case H: // ignore hard clips
|
||||
case P: // ignore pads
|
||||
cigarElementCounter = curElement.getLength();
|
||||
break;
|
||||
case I: // insertion w.r.t. the reference
|
||||
case S: // soft clip
|
||||
cigarElementCounter = curElement.getLength();
|
||||
readOffset += curElement.getLength();
|
||||
break;
|
||||
case D: // deletion w.r.t. the reference
|
||||
if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string
|
||||
throw new UserException.MalformedBAM(read, "read starts with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
|
||||
// should be the same as N case
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
break;
|
||||
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
break;
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
readOffset++;
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator());
|
||||
}
|
||||
|
||||
return done ? curElement.getOperator() : stepForwardOnGenome();
|
||||
}
|
||||
}
|
||||
|
||||
//final boolean DEBUG = false;
|
||||
//final boolean DEBUG2 = false && DEBUG;
|
||||
private ReadProperties readInfo;
|
||||
private AlignmentContext nextAlignmentContext;
|
||||
private boolean performDownsampling;
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -218,22 +74,27 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples) {
|
||||
this.readInfo = readInformation;
|
||||
public LocusIteratorByState(final Iterator<SAMRecord> samIterator,
|
||||
final ReadProperties readInformation,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final Collection<String> samples) {
|
||||
this(samIterator,
|
||||
toDownsamplingInfo(readInformation),
|
||||
readInformation.includeReadsWithDeletionAtLoci(),
|
||||
genomeLocParser,
|
||||
samples);
|
||||
}
|
||||
|
||||
protected LocusIteratorByState(final Iterator<SAMRecord> samIterator,
|
||||
final LIBSDownsamplingInfo downsamplingInfo,
|
||||
final boolean includeReadsWithDeletionAtLoci,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final Collection<String> samples) {
|
||||
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.samples = new ArrayList<String>(samples);
|
||||
|
||||
// LIBS will invoke the Reservoir and Leveling downsamplers on the read stream if we're
|
||||
// downsampling to coverage by sample. SAMDataSource will have refrained from applying
|
||||
// any downsamplers to the read stream in this case, in the expectation that LIBS will
|
||||
// manage the downsampling. The reason for this is twofold: performance (don't have to
|
||||
// split/re-assemble the read stream in SAMDataSource), and to enable partial downsampling
|
||||
// of reads (eg., using half of a read, and throwing the rest away).
|
||||
this.performDownsampling = readInfo.getDownsamplingMethod() != null &&
|
||||
readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
|
||||
readInfo.getDownsamplingMethod().toCoverage != null;
|
||||
|
||||
this.readStates = new ReadStateManager(samIterator);
|
||||
this.keepSubmittedReads = false; // TODO -- HOOK UP SYSTEM
|
||||
this.readStates = new ReadStateManager(samIterator, this.samples, downsamplingInfo, keepSubmittedReads);
|
||||
|
||||
// currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
|
||||
// there's no read data. So we need to throw this error only when samIterator.hasNext() is true
|
||||
|
|
@ -242,28 +103,19 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For testing only. Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list
|
||||
* for the system.
|
||||
*/
|
||||
public final static Collection<String> sampleListForSAMWithoutReadGroups() {
|
||||
List<String> samples = new ArrayList<String>();
|
||||
samples.add(null);
|
||||
return samples;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<AlignmentContext> iterator() {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
//this.it.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
lazyLoadNextAlignmentContext();
|
||||
return (nextAlignmentContext != null);
|
||||
//if ( DEBUG ) System.out.printf("hasNext() = %b%n", r);
|
||||
return nextAlignmentContext != null;
|
||||
}
|
||||
|
||||
private GenomeLoc getLocation() {
|
||||
|
|
@ -275,6 +127,8 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
// next() routine and associated collection operations
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public AlignmentContext next() {
|
||||
lazyLoadNextAlignmentContext();
|
||||
if (!hasNext())
|
||||
|
|
@ -299,7 +153,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
boolean hasBeenSampled = false;
|
||||
|
||||
for (final String sample : samples) {
|
||||
final Iterator<SAMRecordState> iterator = readStates.iterator(sample);
|
||||
final Iterator<SAMRecordAlignmentState> iterator = readStates.iterator(sample);
|
||||
final List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
|
||||
|
||||
int size = 0; // number of elements in this sample's pileup
|
||||
|
|
@ -307,7 +161,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
int nMQ0Reads = 0; // number of MQ0 reads in this sample's pileup (warning: current implementation includes N bases that are MQ0)
|
||||
|
||||
while (iterator.hasNext()) {
|
||||
final SAMRecordState state = iterator.next(); // state object with the read/offset information
|
||||
final SAMRecordAlignmentState state = iterator.next(); // state object with the read/offset information
|
||||
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
|
||||
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
|
||||
final CigarElement nextElement = state.peekForwardOnGenome(); // next cigar element
|
||||
|
|
@ -330,7 +184,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
|
||||
if (op == CigarOperator.D) {
|
||||
// TODO -- LIBS is totally busted for deletions so that reads with Ds right before Is in their CIGAR are broken; must fix
|
||||
if (readInfo.includeReadsWithDeletionAtLoci()) { // only add deletions to the pileup if we are authorized to do so
|
||||
if (includeReadsWithDeletionAtLoci) { // only add deletions to the pileup if we are authorized to do so
|
||||
pile.add(new PileupElement(read, readOffset, true, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, null, nextOp == CigarOperator.D ? nextElementLength : -1));
|
||||
size++;
|
||||
nDeletions++;
|
||||
|
|
@ -367,33 +221,11 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
}
|
||||
|
||||
// fast testing of position
|
||||
private boolean readIsPastCurrentPosition(SAMRecord read) {
|
||||
if (readStates.isEmpty())
|
||||
return false;
|
||||
else {
|
||||
SAMRecordState state = readStates.getFirst();
|
||||
SAMRecord ourRead = state.getRead();
|
||||
return read.getReferenceIndex() > ourRead.getReferenceIndex() || read.getAlignmentStart() > state.getGenomePosition();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic place to put per-base filters appropriate to LocusIteratorByState
|
||||
*
|
||||
* @param rec
|
||||
* @param pos
|
||||
* @return
|
||||
*/
|
||||
private static boolean filterBaseInRead(GATKSAMRecord rec, long pos) {
|
||||
return ReadUtils.isBaseInsideAdaptor(rec, pos);
|
||||
}
|
||||
|
||||
private void updateReadStates() {
|
||||
for (final String sample : samples) {
|
||||
Iterator<SAMRecordState> it = readStates.iterator(sample);
|
||||
Iterator<SAMRecordAlignmentState> it = readStates.iterator(sample);
|
||||
while (it.hasNext()) {
|
||||
SAMRecordState state = it.next();
|
||||
SAMRecordAlignmentState state = it.next();
|
||||
CigarOperator op = state.stepForwardOnGenome();
|
||||
if (op == null) {
|
||||
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
|
||||
|
|
@ -405,257 +237,42 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||
}
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// utility functions
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
protected class ReadStateManager {
|
||||
private final PeekableIterator<SAMRecord> iterator;
|
||||
private final SamplePartitioner samplePartitioner;
|
||||
private final Map<String, PerSampleReadStateManager> readStatesBySample = new HashMap<String, PerSampleReadStateManager>();
|
||||
private int totalReadStates = 0;
|
||||
|
||||
public ReadStateManager(Iterator<SAMRecord> source) {
|
||||
this.iterator = new PeekableIterator<SAMRecord>(source);
|
||||
|
||||
for (final String sample : samples) {
|
||||
readStatesBySample.put(sample, new PerSampleReadStateManager());
|
||||
}
|
||||
|
||||
samplePartitioner = new SamplePartitioner(performDownsampling);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a iterator over all the reads associated with the given sample. Note that remove() is implemented
|
||||
* for this iterator; if present, total read states will be decremented.
|
||||
*
|
||||
* @param sample The sample.
|
||||
* @return Iterator over the reads associated with that sample.
|
||||
*/
|
||||
public Iterator<SAMRecordState> iterator(final String sample) {
|
||||
return new Iterator<SAMRecordState>() {
|
||||
private Iterator<SAMRecordState> wrappedIterator = readStatesBySample.get(sample).iterator();
|
||||
|
||||
public boolean hasNext() {
|
||||
return wrappedIterator.hasNext();
|
||||
}
|
||||
|
||||
public SAMRecordState next() {
|
||||
return wrappedIterator.next();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
wrappedIterator.remove();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return totalReadStates == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the total number of reads in the manager across all samples.
|
||||
*
|
||||
* @return Total number of reads over all samples.
|
||||
*/
|
||||
public int size() {
|
||||
return totalReadStates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the total number of reads in the manager in the given sample.
|
||||
*
|
||||
* @param sample The sample.
|
||||
* @return Total number of reads in the given sample.
|
||||
*/
|
||||
public int size(final String sample) {
|
||||
return readStatesBySample.get(sample).size();
|
||||
}
|
||||
|
||||
public SAMRecordState getFirst() {
|
||||
for (final String sample : samples) {
|
||||
PerSampleReadStateManager reads = readStatesBySample.get(sample);
|
||||
if (!reads.isEmpty())
|
||||
return reads.peek();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return totalReadStates > 0 || iterator.hasNext();
|
||||
}
|
||||
|
||||
public void collectPendingReads() {
|
||||
if (!iterator.hasNext())
|
||||
return;
|
||||
|
||||
if (readStates.size() == 0) {
|
||||
int firstContigIndex = iterator.peek().getReferenceIndex();
|
||||
int firstAlignmentStart = iterator.peek().getAlignmentStart();
|
||||
while (iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) {
|
||||
samplePartitioner.submitRead(iterator.next());
|
||||
}
|
||||
} else {
|
||||
// Fast fail in the case that the read is past the current position.
|
||||
if (readIsPastCurrentPosition(iterator.peek()))
|
||||
return;
|
||||
|
||||
while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) {
|
||||
samplePartitioner.submitRead(iterator.next());
|
||||
}
|
||||
}
|
||||
|
||||
samplePartitioner.doneSubmittingReads();
|
||||
|
||||
for (final String sample : samples) {
|
||||
Collection<SAMRecord> newReads = samplePartitioner.getReadsForSample(sample);
|
||||
PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
|
||||
addReadsToSample(statesBySample, newReads);
|
||||
}
|
||||
|
||||
samplePartitioner.reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add reads with the given sample name to the given hanger entry.
|
||||
*
|
||||
* @param readStates The list of read states to add this collection of reads.
|
||||
* @param reads Reads to add. Selected reads will be pulled from this source.
|
||||
*/
|
||||
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads) {
|
||||
if (reads.isEmpty())
|
||||
return;
|
||||
|
||||
Collection<SAMRecordState> newReadStates = new LinkedList<SAMRecordState>();
|
||||
|
||||
for (SAMRecord read : reads) {
|
||||
SAMRecordState state = new SAMRecordState(read);
|
||||
state.stepForwardOnGenome();
|
||||
newReadStates.add(state);
|
||||
}
|
||||
|
||||
readStates.addStatesAtNextAlignmentStart(newReadStates);
|
||||
}
|
||||
|
||||
protected class PerSampleReadStateManager implements Iterable<SAMRecordState> {
|
||||
private List<LinkedList<SAMRecordState>> readStatesByAlignmentStart = new LinkedList<LinkedList<SAMRecordState>>();
|
||||
private int thisSampleReadStates = 0;
|
||||
private Downsampler<LinkedList<SAMRecordState>> levelingDownsampler =
|
||||
performDownsampling ?
|
||||
new LevelingDownsampler<LinkedList<SAMRecordState>, SAMRecordState>(readInfo.getDownsamplingMethod().toCoverage) :
|
||||
null;
|
||||
|
||||
public void addStatesAtNextAlignmentStart(Collection<SAMRecordState> states) {
|
||||
if ( states.isEmpty() ) {
|
||||
return;
|
||||
}
|
||||
|
||||
readStatesByAlignmentStart.add(new LinkedList<SAMRecordState>(states));
|
||||
thisSampleReadStates += states.size();
|
||||
totalReadStates += states.size();
|
||||
|
||||
if ( levelingDownsampler != null ) {
|
||||
levelingDownsampler.submit(readStatesByAlignmentStart);
|
||||
levelingDownsampler.signalEndOfInput();
|
||||
|
||||
thisSampleReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
|
||||
totalReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
|
||||
|
||||
// use returned List directly rather than make a copy, for efficiency's sake
|
||||
readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems();
|
||||
levelingDownsampler.reset();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return readStatesByAlignmentStart.isEmpty();
|
||||
}
|
||||
|
||||
public SAMRecordState peek() {
|
||||
return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek();
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return thisSampleReadStates;
|
||||
}
|
||||
|
||||
public Iterator<SAMRecordState> iterator() {
|
||||
return new Iterator<SAMRecordState>() {
|
||||
private Iterator<LinkedList<SAMRecordState>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
|
||||
private LinkedList<SAMRecordState> currentPositionReadStates = null;
|
||||
private Iterator<SAMRecordState> currentPositionReadStatesIterator = null;
|
||||
|
||||
public boolean hasNext() {
|
||||
return alignmentStartIterator.hasNext() ||
|
||||
(currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext());
|
||||
}
|
||||
|
||||
public SAMRecordState next() {
|
||||
if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) {
|
||||
currentPositionReadStates = alignmentStartIterator.next();
|
||||
currentPositionReadStatesIterator = currentPositionReadStates.iterator();
|
||||
}
|
||||
|
||||
return currentPositionReadStatesIterator.next();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
currentPositionReadStatesIterator.remove();
|
||||
thisSampleReadStates--;
|
||||
totalReadStates--;
|
||||
|
||||
if ( currentPositionReadStates.isEmpty() ) {
|
||||
alignmentStartIterator.remove();
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Generic place to put per-base filters appropriate to LocusIteratorByState
|
||||
*
|
||||
* @param rec
|
||||
* @param pos
|
||||
* @return
|
||||
*/
|
||||
private boolean filterBaseInRead(GATKSAMRecord rec, long pos) {
|
||||
return ReadUtils.isBaseInsideAdaptor(rec, pos);
|
||||
}
|
||||
|
||||
/**
|
||||
* Divides reads by sample and (if requested) does a preliminary downsampling pass with a ReservoirDownsampler.
|
||||
* Create a LIBSDownsamplingInfo object from the requested info in ReadProperties
|
||||
*
|
||||
* Note: stores reads by sample ID string, not by sample object
|
||||
* LIBS will invoke the Reservoir and Leveling downsamplers on the read stream if we're
|
||||
* downsampling to coverage by sample. SAMDataSource will have refrained from applying
|
||||
* any downsamplers to the read stream in this case, in the expectation that LIBS will
|
||||
* manage the downsampling. The reason for this is twofold: performance (don't have to
|
||||
* split/re-assemble the read stream in SAMDataSource), and to enable partial downsampling
|
||||
* of reads (eg., using half of a read, and throwing the rest away).
|
||||
*
|
||||
* @param readInfo GATK engine information about what should be done to the reads
|
||||
* @return a LIBS specific info holder about downsampling only
|
||||
*/
|
||||
private class SamplePartitioner {
|
||||
private Map<String, Downsampler<SAMRecord>> readsBySample;
|
||||
private static LIBSDownsamplingInfo toDownsamplingInfo(final ReadProperties readInfo) {
|
||||
final boolean performDownsampling = readInfo.getDownsamplingMethod() != null &&
|
||||
readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
|
||||
readInfo.getDownsamplingMethod().toCoverage != null;
|
||||
final int coverage = performDownsampling ? readInfo.getDownsamplingMethod().toCoverage : 0;
|
||||
|
||||
public SamplePartitioner( boolean downsampleReads ) {
|
||||
readsBySample = new HashMap<String, Downsampler<SAMRecord>>();
|
||||
|
||||
for ( String sample : samples ) {
|
||||
readsBySample.put(sample,
|
||||
downsampleReads ? new ReservoirDownsampler<SAMRecord>(readInfo.getDownsamplingMethod().toCoverage) :
|
||||
new PassThroughDownsampler<SAMRecord>());
|
||||
}
|
||||
}
|
||||
|
||||
public void submitRead(SAMRecord read) {
|
||||
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
|
||||
if (readsBySample.containsKey(sampleName))
|
||||
readsBySample.get(sampleName).submit(read);
|
||||
}
|
||||
|
||||
public void doneSubmittingReads() {
|
||||
for ( Map.Entry<String, Downsampler<SAMRecord>> perSampleReads : readsBySample.entrySet() ) {
|
||||
perSampleReads.getValue().signalEndOfInput();
|
||||
}
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> getReadsForSample(String sampleName) {
|
||||
if ( ! readsBySample.containsKey(sampleName) )
|
||||
throw new NoSuchElementException("Sample name not found");
|
||||
|
||||
return readsBySample.get(sampleName).consumeFinalizedItems();
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
for ( Map.Entry<String, Downsampler<SAMRecord>> perSampleReads : readsBySample.entrySet() ) {
|
||||
perSampleReads.getValue().clear();
|
||||
perSampleReads.getValue().reset();
|
||||
}
|
||||
}
|
||||
return new LIBSDownsamplingInfo(performDownsampling, coverage);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,343 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.locusiterator;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.downsampling.Downsampler;
|
||||
import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Manages and updates mapping from sample -> List of SAMRecordAlignmentState
|
||||
*
|
||||
* Optionally can keep track of all of the reads pulled off the iterator and
|
||||
* that appeared at any point in the list of SAMRecordAlignmentState for any reads.
|
||||
* This functionaly is only possible at this stage, as this object does the popping of
|
||||
* reads off the underlying source iterator, and presents only a pileup-like interface
|
||||
* of samples -> SAMRecordAlignmentStates. Reconstructing the unique set of reads
|
||||
* used across all pileups is extremely expensive from that data structure.
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 1/5/13
|
||||
* Time: 2:02 PM
|
||||
*/
|
||||
class ReadStateManager {
|
||||
private final List<String> samples;
|
||||
private final PeekableIterator<SAMRecord> iterator;
|
||||
private final SamplePartitioner samplePartitioner;
|
||||
private final Map<String, PerSampleReadStateManager> readStatesBySample = new HashMap<String, PerSampleReadStateManager>();
|
||||
|
||||
private LinkedList<SAMRecord> submittedReads;
|
||||
private final boolean keepSubmittedReads;
|
||||
|
||||
private int totalReadStates = 0;
|
||||
|
||||
public ReadStateManager(final Iterator<SAMRecord> source,
|
||||
final List<String> samples,
|
||||
final LIBSDownsamplingInfo LIBSDownsamplingInfo,
|
||||
final boolean keepSubmittedReads) {
|
||||
this.samples = samples;
|
||||
this.iterator = new PeekableIterator<SAMRecord>(source);
|
||||
|
||||
this.keepSubmittedReads = keepSubmittedReads;
|
||||
this.submittedReads = new LinkedList<SAMRecord>();
|
||||
|
||||
for (final String sample : samples) {
|
||||
readStatesBySample.put(sample, new PerSampleReadStateManager(LIBSDownsamplingInfo));
|
||||
}
|
||||
|
||||
samplePartitioner = new SamplePartitioner(LIBSDownsamplingInfo, samples);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a iterator over all the reads associated with the given sample. Note that remove() is implemented
|
||||
* for this iterator; if present, total read states will be decremented.
|
||||
*
|
||||
* @param sample The sample.
|
||||
* @return Iterator over the reads associated with that sample.
|
||||
*/
|
||||
public Iterator<SAMRecordAlignmentState> iterator(final String sample) {
|
||||
return new Iterator<SAMRecordAlignmentState>() {
|
||||
private Iterator<SAMRecordAlignmentState> wrappedIterator = readStatesBySample.get(sample).iterator();
|
||||
|
||||
public boolean hasNext() {
|
||||
return wrappedIterator.hasNext();
|
||||
}
|
||||
|
||||
public SAMRecordAlignmentState next() {
|
||||
return wrappedIterator.next();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
wrappedIterator.remove();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return totalReadStates == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the total number of reads in the manager across all samples.
|
||||
*
|
||||
* @return Total number of reads over all samples.
|
||||
*/
|
||||
public int size() {
|
||||
return totalReadStates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the total number of reads in the manager in the given sample.
|
||||
*
|
||||
* @param sample The sample.
|
||||
* @return Total number of reads in the given sample.
|
||||
*/
|
||||
public int size(final String sample) {
|
||||
return readStatesBySample.get(sample).size();
|
||||
}
|
||||
|
||||
public SAMRecordAlignmentState getFirst() {
|
||||
for (final String sample : samples) {
|
||||
PerSampleReadStateManager reads = readStatesBySample.get(sample);
|
||||
if (!reads.isEmpty())
|
||||
return reads.peek();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return totalReadStates > 0 || iterator.hasNext();
|
||||
}
|
||||
|
||||
// fast testing of position
|
||||
private boolean readIsPastCurrentPosition(SAMRecord read) {
|
||||
if (isEmpty())
|
||||
return false;
|
||||
else {
|
||||
SAMRecordAlignmentState state = getFirst();
|
||||
SAMRecord ourRead = state.getRead();
|
||||
return read.getReferenceIndex() > ourRead.getReferenceIndex() || read.getAlignmentStart() > state.getGenomePosition();
|
||||
}
|
||||
}
|
||||
|
||||
public void collectPendingReads() {
|
||||
if (!iterator.hasNext())
|
||||
return;
|
||||
|
||||
// the next record in the stream, peeked as to not remove it from the stream
|
||||
if ( isEmpty() ) {
|
||||
final int firstContigIndex = iterator.peek().getReferenceIndex();
|
||||
final int firstAlignmentStart = iterator.peek().getAlignmentStart();
|
||||
while (iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) {
|
||||
submitRead(iterator.next());
|
||||
}
|
||||
} else {
|
||||
// Fast fail in the case that the read is past the current position.
|
||||
if (readIsPastCurrentPosition(iterator.peek()))
|
||||
return;
|
||||
|
||||
while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) {
|
||||
submitRead(iterator.next());
|
||||
}
|
||||
}
|
||||
|
||||
samplePartitioner.doneSubmittingReads();
|
||||
|
||||
for (final String sample : samples) {
|
||||
Collection<SAMRecord> newReads = samplePartitioner.getReadsForSample(sample);
|
||||
PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
|
||||
addReadsToSample(statesBySample, newReads);
|
||||
}
|
||||
|
||||
samplePartitioner.reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a read to the sample partitioner, potentially adding it to all submitted reads, if appropriate
|
||||
* @param read a non-null read
|
||||
*/
|
||||
@Requires("read != null")
|
||||
protected void submitRead(final SAMRecord read) {
|
||||
if ( keepSubmittedReads )
|
||||
submittedReads.add(read);
|
||||
samplePartitioner.submitRead(read);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transfer current list of submitted reads, clearing old list
|
||||
*
|
||||
* Takes the maintained list of submitted reads, and transfers it to the caller of this
|
||||
* function. The old list of set to a new, cleanly allocated list so the caller officially
|
||||
* owns the list returned by this call. This is the only way to clear the tracking
|
||||
* of submitted reads, if enabled.
|
||||
*
|
||||
* How to use this function:
|
||||
*
|
||||
* while ( doing some work unit, such as creating pileup at some locus ):
|
||||
* interact with ReadStateManager in some way to make work unit
|
||||
* readsUsedInPileup = transferSubmittedReads)
|
||||
*
|
||||
* @throws UnsupportedOperationException if called when keepingSubmittedReads is false
|
||||
*
|
||||
* @return the current list of submitted reads
|
||||
*/
|
||||
@Ensures({
|
||||
"result != null",
|
||||
"result != submittedReads" // result and previous submitted reads are not == objects
|
||||
})
|
||||
public List<SAMRecord> transferSubmittedReads() {
|
||||
if ( ! keepSubmittedReads ) throw new UnsupportedOperationException("cannot transferSubmittedReads if you aren't keeping them");
|
||||
|
||||
final List<SAMRecord> prevSubmittedReads = submittedReads;
|
||||
this.submittedReads = new LinkedList<SAMRecord>();
|
||||
|
||||
return prevSubmittedReads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain a pointer to the list of submitted reads.
|
||||
*
|
||||
* This is not a copy of the list; it is shared with this ReadStateManager. It should
|
||||
* not be modified. Updates to this ReadStateManager may change the contains of the
|
||||
* list entirely.
|
||||
*
|
||||
* For testing purposes only.
|
||||
*
|
||||
* Will always be empty if we are are not keepingSubmittedReads
|
||||
*
|
||||
* @return a non-null list of reads that have been submitted to this ReadStateManager
|
||||
*/
|
||||
@Ensures({"result != null","keepingSubmittedReads || result.isEmpty()"})
|
||||
protected List<SAMRecord> getSubmittedReads() {
|
||||
return submittedReads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add reads with the given sample name to the given hanger entry.
|
||||
*
|
||||
* @param readStates The list of read states to add this collection of reads.
|
||||
* @param reads Reads to add. Selected reads will be pulled from this source.
|
||||
*/
|
||||
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads) {
|
||||
if (reads.isEmpty())
|
||||
return;
|
||||
|
||||
Collection<SAMRecordAlignmentState> newReadStates = new LinkedList<SAMRecordAlignmentState>();
|
||||
|
||||
for (SAMRecord read : reads) {
|
||||
SAMRecordAlignmentState state = new SAMRecordAlignmentState(read);
|
||||
state.stepForwardOnGenome();
|
||||
newReadStates.add(state);
|
||||
}
|
||||
|
||||
readStates.addStatesAtNextAlignmentStart(newReadStates);
|
||||
}
|
||||
|
||||
protected class PerSampleReadStateManager implements Iterable<SAMRecordAlignmentState> {
|
||||
private List<LinkedList<SAMRecordAlignmentState>> readStatesByAlignmentStart = new LinkedList<LinkedList<SAMRecordAlignmentState>>();
|
||||
private final Downsampler<LinkedList<SAMRecordAlignmentState>> levelingDownsampler;
|
||||
|
||||
private int thisSampleReadStates = 0;
|
||||
|
||||
public PerSampleReadStateManager(final LIBSDownsamplingInfo LIBSDownsamplingInfo) {
|
||||
this.levelingDownsampler = LIBSDownsamplingInfo.isPerformDownsampling()
|
||||
? new LevelingDownsampler<LinkedList<SAMRecordAlignmentState>, SAMRecordAlignmentState>(LIBSDownsamplingInfo.getToCoverage())
|
||||
: null;
|
||||
}
|
||||
|
||||
public void addStatesAtNextAlignmentStart(Collection<SAMRecordAlignmentState> states) {
|
||||
if ( states.isEmpty() ) {
|
||||
return;
|
||||
}
|
||||
|
||||
readStatesByAlignmentStart.add(new LinkedList<SAMRecordAlignmentState>(states));
|
||||
thisSampleReadStates += states.size();
|
||||
totalReadStates += states.size();
|
||||
|
||||
if ( levelingDownsampler != null ) {
|
||||
levelingDownsampler.submit(readStatesByAlignmentStart);
|
||||
levelingDownsampler.signalEndOfInput();
|
||||
|
||||
thisSampleReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
|
||||
totalReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
|
||||
|
||||
// use returned List directly rather than make a copy, for efficiency's sake
|
||||
readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems();
|
||||
levelingDownsampler.reset();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return readStatesByAlignmentStart.isEmpty();
|
||||
}
|
||||
|
||||
public SAMRecordAlignmentState peek() {
|
||||
return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek();
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return thisSampleReadStates;
|
||||
}
|
||||
|
||||
public Iterator<SAMRecordAlignmentState> iterator() {
|
||||
return new Iterator<SAMRecordAlignmentState>() {
|
||||
private Iterator<LinkedList<SAMRecordAlignmentState>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
|
||||
private LinkedList<SAMRecordAlignmentState> currentPositionReadStates = null;
|
||||
private Iterator<SAMRecordAlignmentState> currentPositionReadStatesIterator = null;
|
||||
|
||||
public boolean hasNext() {
|
||||
return alignmentStartIterator.hasNext() ||
|
||||
(currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext());
|
||||
}
|
||||
|
||||
public SAMRecordAlignmentState next() {
|
||||
if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) {
|
||||
currentPositionReadStates = alignmentStartIterator.next();
|
||||
currentPositionReadStatesIterator = currentPositionReadStates.iterator();
|
||||
}
|
||||
|
||||
return currentPositionReadStatesIterator.next();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
currentPositionReadStatesIterator.remove();
|
||||
thisSampleReadStates--;
|
||||
totalReadStates--;
|
||||
|
||||
if ( currentPositionReadStates.isEmpty() ) {
|
||||
alignmentStartIterator.remove();
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,205 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.locusiterator;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
/**
|
||||
* Steps a single read along its alignment to the genome
|
||||
*
|
||||
* The logical model for generating extended events is as follows: the "record state"
|
||||
* implements the traversal along the reference; thus stepForwardOnGenome() returns
|
||||
* on every and only on actual reference bases. This can be a (mis)match or a deletion
|
||||
* (in the latter case, we still return on every individual reference base the deletion spans).
|
||||
* In the extended events mode, the record state also remembers if there was an insertion, or
|
||||
* if the deletion just started *right before* the current reference base the record state is
|
||||
* pointing to upon the return from stepForwardOnGenome(). The next call to stepForwardOnGenome()
|
||||
* will clear that memory (as we remember only extended events immediately preceding
|
||||
* the current reference base).
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 1/5/13
|
||||
* Time: 1:08 PM
|
||||
*/
|
||||
class SAMRecordAlignmentState {
|
||||
// TODO -- one idea to clean up this functionality:
|
||||
// TODO --
|
||||
// TODO -- split functionality here into an alignment state machine and an
|
||||
// TODO -- alignment state. The alignment state simply carries with it the
|
||||
// TODO -- state of the alignment (the current cigar op, the genome offset,
|
||||
// TODO -- the read offset, etc. The AlignmentStateMachine produces these
|
||||
// TODO -- states, and has operations such stepForwardOnGenome, getLastState(),
|
||||
// TODO -- getCurrentState(), getNextState();
|
||||
|
||||
/**
|
||||
* Our read
|
||||
*/
|
||||
private final SAMRecord read;
|
||||
private final Cigar cigar;
|
||||
private final int nCigarElements;
|
||||
|
||||
/**
|
||||
* how far are we offset from the start of the read bases?
|
||||
*/
|
||||
int readOffset = -1;
|
||||
|
||||
/**
|
||||
* how far are we offset from the alignment start on the genome?
|
||||
*/
|
||||
int genomeOffset = -1;
|
||||
|
||||
int cigarOffset = -1;
|
||||
CigarElement curElement = null;
|
||||
|
||||
/**
|
||||
* how far are we into a single cigarElement?
|
||||
*/
|
||||
int cigarElementCounter = -1;
|
||||
|
||||
@Requires("read != null")
|
||||
// TODO -- should enforce contracts like the read is aligned, etc
|
||||
public SAMRecordAlignmentState(final SAMRecord read) {
|
||||
this.read = read;
|
||||
this.cigar = read.getCigar();
|
||||
this.nCigarElements = cigar.numCigarElements();
|
||||
}
|
||||
|
||||
public SAMRecord getRead() {
|
||||
return read;
|
||||
}
|
||||
|
||||
/**
|
||||
* What is our current offset in the read's bases that aligns us with the reference genome?
|
||||
*
|
||||
* @return the current read offset position
|
||||
*/
|
||||
public int getReadOffset() {
|
||||
return readOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* What is the current offset w.r.t. the alignment state that aligns us to the readOffset?
|
||||
*
|
||||
* @return the current offset
|
||||
*/
|
||||
public int getGenomeOffset() {
|
||||
return genomeOffset;
|
||||
}
|
||||
|
||||
public int getGenomePosition() {
|
||||
return read.getAlignmentStart() + getGenomeOffset();
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation(GenomeLocParser genomeLocParser) {
|
||||
return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
|
||||
}
|
||||
|
||||
public CigarOperator getCurrentCigarOperator() {
|
||||
return curElement.getOperator();
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s ro=%d go=%d co=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarOffset, cigarElementCounter, curElement);
|
||||
}
|
||||
|
||||
public CigarElement peekForwardOnGenome() {
|
||||
return ( cigarElementCounter + 1 > curElement.getLength() && cigarOffset + 1 < nCigarElements ? cigar.getCigarElement(cigarOffset + 1) : curElement );
|
||||
}
|
||||
|
||||
public CigarElement peekBackwardOnGenome() {
|
||||
return ( cigarElementCounter - 1 == 0 && cigarOffset - 1 > 0 ? cigar.getCigarElement(cigarOffset - 1) : curElement );
|
||||
}
|
||||
|
||||
public CigarOperator stepForwardOnGenome() {
|
||||
// we enter this method with readOffset = index of the last processed base on the read
|
||||
// (-1 if we did not process a single base yet); this can be last matching base,
|
||||
// or last base of an insertion
|
||||
if (curElement == null || ++cigarElementCounter > curElement.getLength()) {
|
||||
cigarOffset++;
|
||||
if (cigarOffset < nCigarElements) {
|
||||
curElement = cigar.getCigarElement(cigarOffset);
|
||||
cigarElementCounter = 0;
|
||||
// next line: guards against cigar elements of length 0; when new cigar element is retrieved,
|
||||
// we reenter in order to re-check cigarElementCounter against curElement's length
|
||||
return stepForwardOnGenome();
|
||||
} else {
|
||||
if (curElement != null && curElement.getOperator() == CigarOperator.D)
|
||||
throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
|
||||
|
||||
// Reads that contain indels model the genomeOffset as the following base in the reference. Because
|
||||
// we fall into this else block only when indels end the read, increment genomeOffset such that the
|
||||
// current offset of this read is the next ref base after the end of the indel. This position will
|
||||
// model a point on the reference somewhere after the end of the read.
|
||||
genomeOffset++; // extended events need that. Logically, it's legal to advance the genomic offset here:
|
||||
// we do step forward on the ref, and by returning null we also indicate that we are past the read end.
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
boolean done = false;
|
||||
switch (curElement.getOperator()) {
|
||||
case H: // ignore hard clips
|
||||
case P: // ignore pads
|
||||
cigarElementCounter = curElement.getLength();
|
||||
break;
|
||||
case I: // insertion w.r.t. the reference
|
||||
case S: // soft clip
|
||||
cigarElementCounter = curElement.getLength();
|
||||
readOffset += curElement.getLength();
|
||||
break;
|
||||
case D: // deletion w.r.t. the reference
|
||||
if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string
|
||||
throw new UserException.MalformedBAM(read, "read starts with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
|
||||
// should be the same as N case
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
break;
|
||||
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
break;
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
readOffset++;
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator());
|
||||
}
|
||||
|
||||
return done ? curElement.getOperator() : stepForwardOnGenome();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.locusiterator;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.downsampling.Downsampler;
|
||||
import org.broadinstitute.sting.gatk.downsampling.PassThroughDownsampler;
|
||||
import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Divides reads by sample and (if requested) does a preliminary downsampling pass with a ReservoirDownsampler.
|
||||
*
|
||||
* Note: stores reads by sample ID string, not by sample object
|
||||
*/
|
||||
class SamplePartitioner {
|
||||
private Map<String, Downsampler<SAMRecord>> readsBySample;
|
||||
|
||||
public SamplePartitioner(final LIBSDownsamplingInfo LIBSDownsamplingInfo, final List<String> samples) {
|
||||
readsBySample = new HashMap<String, Downsampler<SAMRecord>>(samples.size());
|
||||
for ( String sample : samples ) {
|
||||
readsBySample.put(sample, createDownsampler(LIBSDownsamplingInfo));
|
||||
}
|
||||
}
|
||||
|
||||
private Downsampler<SAMRecord> createDownsampler(final LIBSDownsamplingInfo LIBSDownsamplingInfo) {
|
||||
return LIBSDownsamplingInfo.isPerformDownsampling()
|
||||
? new ReservoirDownsampler<SAMRecord>(LIBSDownsamplingInfo.getToCoverage())
|
||||
: new PassThroughDownsampler<SAMRecord>();
|
||||
}
|
||||
|
||||
public void submitRead(SAMRecord read) {
|
||||
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
|
||||
if (readsBySample.containsKey(sampleName))
|
||||
readsBySample.get(sampleName).submit(read);
|
||||
}
|
||||
|
||||
public void doneSubmittingReads() {
|
||||
for ( Map.Entry<String, Downsampler<SAMRecord>> perSampleReads : readsBySample.entrySet() ) {
|
||||
perSampleReads.getValue().signalEndOfInput();
|
||||
}
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> getReadsForSample(String sampleName) {
|
||||
if ( ! readsBySample.containsKey(sampleName) )
|
||||
throw new NoSuchElementException("Sample name not found");
|
||||
|
||||
return readsBySample.get(sampleName).consumeFinalizedItems();
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
for ( Map.Entry<String, Downsampler<SAMRecord>> perSampleReads : readsBySample.entrySet() ) {
|
||||
perSampleReads.getValue().clear();
|
||||
perSampleReads.getValue().reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -23,7 +23,7 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.locusiterator;
|
||||
package org.broadinstitute.sting.utils.locusiterator.legacy;
|
||||
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import net.sf.samtools.Cigar;
|
||||
|
|
@ -40,6 +40,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.LegacyReservoirDownsampler;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.locusiterator.LocusIterator;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -326,6 +326,34 @@ public class ArtificialSAMUtils {
|
|||
return stack;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a read stream based on the parameters. The cigar string for each
|
||||
* read will be *M, where * is the length of the read.
|
||||
*
|
||||
* Useful for testing things like LocusIteratorBystate
|
||||
*
|
||||
* @return a collection of stackSize reads all sharing the above properties
|
||||
*/
|
||||
public static List<SAMRecord> createReadStream( final int nReadsPerLocus,
|
||||
final int nLoci,
|
||||
final SAMFileHeader header,
|
||||
final int alignmentStart,
|
||||
final int length ) {
|
||||
final String name = "readName";
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>(nReadsPerLocus*nLoci);
|
||||
for ( int locus = 0; locus < nLoci; locus++ ) {
|
||||
for ( int readI = 0; readI < nReadsPerLocus; readI++ ) {
|
||||
for ( final SAMReadGroupRecord rg : header.getReadGroups() ) {
|
||||
final GATKSAMRecord read = createArtificialRead(header, name, 0, alignmentStart, length);
|
||||
read.setReadGroup(new GATKSAMReadGroupRecord(rg));
|
||||
reads.add(read);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* create an iterator containing the specified read piles
|
||||
*
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ import org.broadinstitute.sting.gatk.ReadProperties;
|
|||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||
import org.broadinstitute.sting.utils.locusiterator.LegacyLocusIteratorByState;
|
||||
import org.broadinstitute.sting.utils.locusiterator.legacy.LegacyLocusIteratorByState;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.walkers.qc.CountLoci;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,144 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.locusiterator;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: 1/5/13
|
||||
* Time: 8:42 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public final class LIBS_position {
|
||||
|
||||
SAMRecord read;
|
||||
|
||||
final int numOperators;
|
||||
int currentOperatorIndex = 0;
|
||||
int currentPositionOnOperator = 0;
|
||||
int currentReadOffset = 0;
|
||||
|
||||
boolean isBeforeDeletionStart = false;
|
||||
boolean isBeforeDeletedBase = false;
|
||||
boolean isAfterDeletionEnd = false;
|
||||
boolean isAfterDeletedBase = false;
|
||||
boolean isBeforeInsertion = false;
|
||||
boolean isAfterInsertion = false;
|
||||
boolean isNextToSoftClip = false;
|
||||
|
||||
boolean sawMop = false;
|
||||
|
||||
public LIBS_position(final SAMRecord read) {
|
||||
this.read = read;
|
||||
numOperators = read.getCigar().numCigarElements();
|
||||
}
|
||||
|
||||
public int getCurrentReadOffset() {
|
||||
return Math.max(0, currentReadOffset - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Steps forward on the genome. Returns false when done reading the read, true otherwise.
|
||||
*/
|
||||
public boolean stepForwardOnGenome() {
|
||||
if ( currentOperatorIndex == numOperators )
|
||||
return false;
|
||||
|
||||
CigarElement curElement = read.getCigar().getCigarElement(currentOperatorIndex);
|
||||
if ( currentPositionOnOperator >= curElement.getLength() ) {
|
||||
if ( ++currentOperatorIndex == numOperators )
|
||||
return false;
|
||||
|
||||
curElement = read.getCigar().getCigarElement(currentOperatorIndex);
|
||||
currentPositionOnOperator = 0;
|
||||
}
|
||||
|
||||
switch ( curElement.getOperator() ) {
|
||||
case I: // insertion w.r.t. the reference
|
||||
// if ( !sawMop )
|
||||
// break;
|
||||
case S: // soft clip
|
||||
currentReadOffset += curElement.getLength();
|
||||
case H: // hard clip
|
||||
case P: // padding
|
||||
currentOperatorIndex++;
|
||||
return stepForwardOnGenome();
|
||||
|
||||
case D: // deletion w.r.t. the reference
|
||||
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
||||
currentPositionOnOperator++;
|
||||
break;
|
||||
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
sawMop = true;
|
||||
currentReadOffset++;
|
||||
currentPositionOnOperator++;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("No support for cigar op: " + curElement.getOperator());
|
||||
}
|
||||
|
||||
final boolean isFirstOp = currentOperatorIndex == 0;
|
||||
final boolean isLastOp = currentOperatorIndex == numOperators - 1;
|
||||
final boolean isFirstBaseOfOp = currentPositionOnOperator == 1;
|
||||
final boolean isLastBaseOfOp = currentPositionOnOperator == curElement.getLength();
|
||||
|
||||
isBeforeDeletionStart = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isLastOp, isLastBaseOfOp);
|
||||
isBeforeDeletedBase = isBeforeDeletionStart || (!isLastBaseOfOp && curElement.getOperator() == CigarOperator.D);
|
||||
isAfterDeletionEnd = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isFirstOp, isFirstBaseOfOp);
|
||||
isAfterDeletedBase = isAfterDeletionEnd || (!isFirstBaseOfOp && curElement.getOperator() == CigarOperator.D);
|
||||
isBeforeInsertion = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isLastOp, isLastBaseOfOp)
|
||||
|| (!sawMop && curElement.getOperator() == CigarOperator.I);
|
||||
isAfterInsertion = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isFirstOp, isFirstBaseOfOp);
|
||||
isNextToSoftClip = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isLastOp, isLastBaseOfOp)
|
||||
|| isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isFirstOp, isFirstBaseOfOp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static boolean isBeforeOp(final Cigar cigar,
|
||||
final int currentOperatorIndex,
|
||||
final CigarOperator op,
|
||||
final boolean isLastOp,
|
||||
final boolean isLastBaseOfOp) {
|
||||
return !isLastOp && isLastBaseOfOp && cigar.getCigarElement(currentOperatorIndex+1).getOperator() == op;
|
||||
}
|
||||
|
||||
private static boolean isAfterOp(final Cigar cigar,
|
||||
final int currentOperatorIndex,
|
||||
final CigarOperator op,
|
||||
final boolean isFirstOp,
|
||||
final boolean isFirstBaseOfOp) {
|
||||
return !isFirstOp && isFirstBaseOfOp && cigar.getCigarElement(currentOperatorIndex-1).getOperator() == op;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,531 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.locusiterator;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* testing of the LEGACY version of LocusIteratorByState
|
||||
*/
|
||||
public class LegacyLocusIteratorByStateUnitTest extends BaseTest {
|
||||
private static SAMFileHeader header;
|
||||
private LegacyLocusIteratorByState li;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
@BeforeClass
|
||||
public void beforeClass() {
|
||||
header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
}
|
||||
|
||||
private LegacyLocusIteratorByState makeLTBS(List<SAMRecord> reads, ReadProperties readAttributes) {
|
||||
return new LegacyLocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()), readAttributes, genomeLocParser, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testXandEQOperators() {
|
||||
final byte[] bases1 = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
||||
final byte[] bases2 = new byte[] {'A','A','A','C','A','A','A','A','A','C'};
|
||||
|
||||
// create a test version of the Reads object
|
||||
ReadProperties readAttributes = createTestReadProperties();
|
||||
|
||||
SAMRecord r1 = ArtificialSAMUtils.createArtificialRead(header,"r1",0,1,10);
|
||||
r1.setReadBases(bases1);
|
||||
r1.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||
r1.setCigarString("10M");
|
||||
|
||||
SAMRecord r2 = ArtificialSAMUtils.createArtificialRead(header,"r2",0,1,10);
|
||||
r2.setReadBases(bases2);
|
||||
r2.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
|
||||
r2.setCigarString("3=1X5=1X");
|
||||
|
||||
SAMRecord r3 = ArtificialSAMUtils.createArtificialRead(header,"r3",0,1,10);
|
||||
r3.setReadBases(bases2);
|
||||
r3.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
|
||||
r3.setCigarString("3=1X5M1X");
|
||||
|
||||
SAMRecord r4 = ArtificialSAMUtils.createArtificialRead(header,"r4",0,1,10);
|
||||
r4.setReadBases(bases2);
|
||||
r4.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||
r4.setCigarString("10M");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(r1, r2, r3, r4);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads,readAttributes);
|
||||
|
||||
while (li.hasNext()) {
|
||||
AlignmentContext context = li.next();
|
||||
ReadBackedPileup pileup = context.getBasePileup();
|
||||
Assert.assertEquals(pileup.depthOfCoverage(), 4);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIndelsInRegularPileup() {
|
||||
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
||||
final byte[] indelBases = new byte[] {'A','A','A','A','C','T','A','A','A','A','A','A'};
|
||||
|
||||
// create a test version of the Reads object
|
||||
ReadProperties readAttributes = createTestReadProperties();
|
||||
|
||||
SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
|
||||
before.setReadBases(bases);
|
||||
before.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||
before.setCigarString("10M");
|
||||
|
||||
SAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,2,10);
|
||||
during.setReadBases(indelBases);
|
||||
during.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
|
||||
during.setCigarString("4M2I6M");
|
||||
|
||||
SAMRecord after = ArtificialSAMUtils.createArtificialRead(header,"after",0,3,10);
|
||||
after.setReadBases(bases);
|
||||
after.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||
after.setCigarString("10M");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(before, during, after);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads,readAttributes);
|
||||
|
||||
boolean foundIndel = false;
|
||||
while (li.hasNext()) {
|
||||
AlignmentContext context = li.next();
|
||||
ReadBackedPileup pileup = context.getBasePileup().getBaseFilteredPileup(10);
|
||||
for (PileupElement p : pileup) {
|
||||
if (p.isBeforeInsertion()) {
|
||||
foundIndel = true;
|
||||
Assert.assertEquals(p.getEventLength(), 2, "Wrong event length");
|
||||
Assert.assertEquals(p.getEventBases(), "CT", "Inserted bases are incorrect");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Assert.assertTrue(foundIndel,"Indel in pileup not found");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWholeIndelReadInIsolation() {
|
||||
final int firstLocus = 44367789;
|
||||
|
||||
// create a test version of the Reads object
|
||||
ReadProperties readAttributes = createTestReadProperties();
|
||||
|
||||
SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header, "indelOnly", 0, firstLocus, 76);
|
||||
indelOnlyRead.setReadBases(Utils.dupBytes((byte)'A',76));
|
||||
indelOnlyRead.setBaseQualities(Utils.dupBytes((byte) '@', 76));
|
||||
indelOnlyRead.setCigarString("76I");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(indelOnlyRead);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads, readAttributes);
|
||||
|
||||
// Traditionally, reads that end with indels bleed into the pileup at the following locus. Verify that the next pileup contains this read
|
||||
// and considers it to be an indel-containing read.
|
||||
Assert.assertTrue(li.hasNext(),"Should have found a whole-indel read in the normal base pileup without extended events enabled");
|
||||
AlignmentContext alignmentContext = li.next();
|
||||
Assert.assertEquals(alignmentContext.getLocation().getStart(), firstLocus, "Base pileup is at incorrect location.");
|
||||
ReadBackedPileup basePileup = alignmentContext.getBasePileup();
|
||||
Assert.assertEquals(basePileup.getReads().size(),1,"Pileup is of incorrect size");
|
||||
Assert.assertSame(basePileup.getReads().get(0), indelOnlyRead, "Read in pileup is incorrect");
|
||||
}
|
||||
|
||||
/**
|
||||
* Test to make sure that reads supporting only an indel (example cigar string: 76I) do
|
||||
* not negatively influence the ordering of the pileup.
|
||||
*/
|
||||
@Test
|
||||
public void testWholeIndelRead() {
|
||||
final int firstLocus = 44367788, secondLocus = firstLocus + 1;
|
||||
|
||||
SAMRecord leadingRead = ArtificialSAMUtils.createArtificialRead(header,"leading",0,firstLocus,76);
|
||||
leadingRead.setReadBases(Utils.dupBytes((byte)'A',76));
|
||||
leadingRead.setBaseQualities(Utils.dupBytes((byte)'@',76));
|
||||
leadingRead.setCigarString("1M75I");
|
||||
|
||||
SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header,"indelOnly",0,secondLocus,76);
|
||||
indelOnlyRead.setReadBases(Utils.dupBytes((byte) 'A', 76));
|
||||
indelOnlyRead.setBaseQualities(Utils.dupBytes((byte)'@',76));
|
||||
indelOnlyRead.setCigarString("76I");
|
||||
|
||||
SAMRecord fullMatchAfterIndel = ArtificialSAMUtils.createArtificialRead(header,"fullMatch",0,secondLocus,76);
|
||||
fullMatchAfterIndel.setReadBases(Utils.dupBytes((byte)'A',76));
|
||||
fullMatchAfterIndel.setBaseQualities(Utils.dupBytes((byte)'@',76));
|
||||
fullMatchAfterIndel.setCigarString("75I1M");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(leadingRead, indelOnlyRead, fullMatchAfterIndel);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads, createTestReadProperties());
|
||||
int currentLocus = firstLocus;
|
||||
int numAlignmentContextsFound = 0;
|
||||
|
||||
while(li.hasNext()) {
|
||||
AlignmentContext alignmentContext = li.next();
|
||||
Assert.assertEquals(alignmentContext.getLocation().getStart(),currentLocus,"Current locus returned by alignment context is incorrect");
|
||||
|
||||
if(currentLocus == firstLocus) {
|
||||
List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
|
||||
Assert.assertEquals(readsAtLocus.size(),1,"Wrong number of reads at locus " + currentLocus);
|
||||
Assert.assertSame(readsAtLocus.get(0),leadingRead,"leadingRead absent from pileup at locus " + currentLocus);
|
||||
}
|
||||
else if(currentLocus == secondLocus) {
|
||||
List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
|
||||
Assert.assertEquals(readsAtLocus.size(),2,"Wrong number of reads at locus " + currentLocus);
|
||||
Assert.assertSame(readsAtLocus.get(0),indelOnlyRead,"indelOnlyRead absent from pileup at locus " + currentLocus);
|
||||
Assert.assertSame(readsAtLocus.get(1),fullMatchAfterIndel,"fullMatchAfterIndel absent from pileup at locus " + currentLocus);
|
||||
}
|
||||
|
||||
currentLocus++;
|
||||
numAlignmentContextsFound++;
|
||||
}
|
||||
|
||||
Assert.assertEquals(numAlignmentContextsFound, 2, "Found incorrect number of alignment contexts");
|
||||
}
|
||||
|
||||
/**
|
||||
* Test to make sure that reads supporting only an indel (example cigar string: 76I) are represented properly
|
||||
*/
|
||||
@Test
|
||||
public void testWholeIndelReadRepresentedTest() {
|
||||
final int firstLocus = 44367788, secondLocus = firstLocus + 1;
|
||||
|
||||
SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,secondLocus,1);
|
||||
read1.setReadBases(Utils.dupBytes((byte) 'A', 1));
|
||||
read1.setBaseQualities(Utils.dupBytes((byte) '@', 1));
|
||||
read1.setCigarString("1I");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(read1);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads, createTestReadProperties());
|
||||
|
||||
while(li.hasNext()) {
|
||||
AlignmentContext alignmentContext = li.next();
|
||||
ReadBackedPileup p = alignmentContext.getBasePileup();
|
||||
Assert.assertTrue(p.getNumberOfElements() == 1);
|
||||
PileupElement pe = p.iterator().next();
|
||||
Assert.assertTrue(pe.isBeforeInsertion());
|
||||
Assert.assertFalse(pe.isAfterInsertion());
|
||||
Assert.assertEquals(pe.getEventBases(), "A");
|
||||
}
|
||||
|
||||
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,secondLocus,10);
|
||||
read2.setReadBases(Utils.dupBytes((byte) 'A', 10));
|
||||
read2.setBaseQualities(Utils.dupBytes((byte) '@', 10));
|
||||
read2.setCigarString("10I");
|
||||
|
||||
reads = Arrays.asList(read2);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads, createTestReadProperties());
|
||||
|
||||
while(li.hasNext()) {
|
||||
AlignmentContext alignmentContext = li.next();
|
||||
ReadBackedPileup p = alignmentContext.getBasePileup();
|
||||
Assert.assertTrue(p.getNumberOfElements() == 1);
|
||||
PileupElement pe = p.iterator().next();
|
||||
Assert.assertTrue(pe.isBeforeInsertion());
|
||||
Assert.assertFalse(pe.isAfterInsertion());
|
||||
Assert.assertEquals(pe.getEventBases(), "AAAAAAAAAA");
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////
|
||||
// comprehensive LIBS/PileupElement tests //
|
||||
////////////////////////////////////////////
|
||||
|
||||
private static class LIBSTest {
|
||||
|
||||
|
||||
final String cigar;
|
||||
final int readLength;
|
||||
|
||||
private LIBSTest(final String cigar, final int readLength) {
|
||||
this.cigar = cigar;
|
||||
this.readLength = readLength;
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "LIBSTest")
|
||||
public Object[][] createLIBSTestData() {
|
||||
|
||||
//TODO -- when LIBS is fixed this should be replaced to provide all possible permutations of CIGAR strings
|
||||
|
||||
return new Object[][]{
|
||||
{new LIBSTest("1I", 1)},
|
||||
{new LIBSTest("10I", 10)},
|
||||
{new LIBSTest("2M2I2M", 6)},
|
||||
{new LIBSTest("2M2I", 4)},
|
||||
//TODO -- uncomment these when LIBS is fixed
|
||||
//{new LIBSTest("2I2M", 4, Arrays.asList(2,3), Arrays.asList(IS_AFTER_INSERTION_FLAG,0))},
|
||||
//{new LIBSTest("1I1M1D1M", 3, Arrays.asList(0,1), Arrays.asList(IS_AFTER_INSERTION_FLAG | IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG))},
|
||||
//{new LIBSTest("1S1I1M", 3, Arrays.asList(2), Arrays.asList(IS_AFTER_INSERTION_FLAG))},
|
||||
//{new LIBSTest("1M2D2M", 3)},
|
||||
{new LIBSTest("1S1M", 2)},
|
||||
{new LIBSTest("1M1S", 2)},
|
||||
{new LIBSTest("1S1M1I", 3)}
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "LIBSTest")
|
||||
public void testLIBS(LIBSTest params) {
|
||||
final int locus = 44367788;
|
||||
|
||||
SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, params.readLength);
|
||||
read.setReadBases(Utils.dupBytes((byte) 'A', params.readLength));
|
||||
read.setBaseQualities(Utils.dupBytes((byte) '@', params.readLength));
|
||||
read.setCigarString(params.cigar);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(Arrays.asList(read), createTestReadProperties());
|
||||
final LIBS_position tester = new LIBS_position(read);
|
||||
|
||||
while ( li.hasNext() ) {
|
||||
AlignmentContext alignmentContext = li.next();
|
||||
ReadBackedPileup p = alignmentContext.getBasePileup();
|
||||
Assert.assertTrue(p.getNumberOfElements() == 1);
|
||||
PileupElement pe = p.iterator().next();
|
||||
|
||||
tester.stepForwardOnGenome();
|
||||
|
||||
Assert.assertEquals(pe.isBeforeDeletedBase(), tester.isBeforeDeletedBase);
|
||||
Assert.assertEquals(pe.isBeforeDeletionStart(), tester.isBeforeDeletionStart);
|
||||
Assert.assertEquals(pe.isAfterDeletedBase(), tester.isAfterDeletedBase);
|
||||
Assert.assertEquals(pe.isAfterDeletionEnd(), tester.isAfterDeletionEnd);
|
||||
Assert.assertEquals(pe.isBeforeInsertion(), tester.isBeforeInsertion);
|
||||
Assert.assertEquals(pe.isAfterInsertion(), tester.isAfterInsertion);
|
||||
Assert.assertEquals(pe.isNextToSoftClip(), tester.isNextToSoftClip);
|
||||
Assert.assertEquals(pe.getOffset(), tester.getCurrentReadOffset());
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// End comprehensive LIBS/PileupElement tests //
|
||||
////////////////////////////////////////////////
|
||||
|
||||
private static ReadProperties createTestReadProperties() {
|
||||
return new ReadProperties(
|
||||
Collections.<SAMReaderID>emptyList(),
|
||||
new SAMFileHeader(),
|
||||
SAMFileHeader.SortOrder.coordinate,
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.STRICT,
|
||||
null,
|
||||
new ValidationExclusion(),
|
||||
Collections.<ReadFilter>emptyList(),
|
||||
Collections.<ReadTransformer>emptyList(),
|
||||
false,
|
||||
(byte) -1
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
class FakeCloseableIterator<T> implements CloseableIterator<T> {
|
||||
Iterator<T> iterator;
|
||||
|
||||
public FakeCloseableIterator(Iterator<T> it) {
|
||||
iterator = it;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return iterator.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public T next() {
|
||||
return iterator.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Don't remove!");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
final class LIBS_position {
|
||||
|
||||
SAMRecord read;
|
||||
|
||||
final int numOperators;
|
||||
int currentOperatorIndex = 0;
|
||||
int currentPositionOnOperator = 0;
|
||||
int currentReadOffset = 0;
|
||||
|
||||
boolean isBeforeDeletionStart = false;
|
||||
boolean isBeforeDeletedBase = false;
|
||||
boolean isAfterDeletionEnd = false;
|
||||
boolean isAfterDeletedBase = false;
|
||||
boolean isBeforeInsertion = false;
|
||||
boolean isAfterInsertion = false;
|
||||
boolean isNextToSoftClip = false;
|
||||
|
||||
boolean sawMop = false;
|
||||
|
||||
public LIBS_position(final SAMRecord read) {
|
||||
this.read = read;
|
||||
numOperators = read.getCigar().numCigarElements();
|
||||
}
|
||||
|
||||
public int getCurrentReadOffset() {
|
||||
return Math.max(0, currentReadOffset - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Steps forward on the genome. Returns false when done reading the read, true otherwise.
|
||||
*/
|
||||
public boolean stepForwardOnGenome() {
|
||||
if ( currentOperatorIndex == numOperators )
|
||||
return false;
|
||||
|
||||
CigarElement curElement = read.getCigar().getCigarElement(currentOperatorIndex);
|
||||
if ( currentPositionOnOperator >= curElement.getLength() ) {
|
||||
if ( ++currentOperatorIndex == numOperators )
|
||||
return false;
|
||||
|
||||
curElement = read.getCigar().getCigarElement(currentOperatorIndex);
|
||||
currentPositionOnOperator = 0;
|
||||
}
|
||||
|
||||
switch ( curElement.getOperator() ) {
|
||||
case I: // insertion w.r.t. the reference
|
||||
if ( !sawMop )
|
||||
break;
|
||||
case S: // soft clip
|
||||
currentReadOffset += curElement.getLength();
|
||||
case H: // hard clip
|
||||
case P: // padding
|
||||
currentOperatorIndex++;
|
||||
return stepForwardOnGenome();
|
||||
|
||||
case D: // deletion w.r.t. the reference
|
||||
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
||||
currentPositionOnOperator++;
|
||||
break;
|
||||
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
sawMop = true;
|
||||
currentReadOffset++;
|
||||
currentPositionOnOperator++;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("No support for cigar op: " + curElement.getOperator());
|
||||
}
|
||||
|
||||
final boolean isFirstOp = currentOperatorIndex == 0;
|
||||
final boolean isLastOp = currentOperatorIndex == numOperators - 1;
|
||||
final boolean isFirstBaseOfOp = currentPositionOnOperator == 1;
|
||||
final boolean isLastBaseOfOp = currentPositionOnOperator == curElement.getLength();
|
||||
|
||||
isBeforeDeletionStart = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isLastOp, isLastBaseOfOp);
|
||||
isBeforeDeletedBase = isBeforeDeletionStart || (!isLastBaseOfOp && curElement.getOperator() == CigarOperator.D);
|
||||
isAfterDeletionEnd = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isFirstOp, isFirstBaseOfOp);
|
||||
isAfterDeletedBase = isAfterDeletionEnd || (!isFirstBaseOfOp && curElement.getOperator() == CigarOperator.D);
|
||||
isBeforeInsertion = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isLastOp, isLastBaseOfOp)
|
||||
|| (!sawMop && curElement.getOperator() == CigarOperator.I);
|
||||
isAfterInsertion = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isFirstOp, isFirstBaseOfOp);
|
||||
isNextToSoftClip = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isLastOp, isLastBaseOfOp)
|
||||
|| isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isFirstOp, isFirstBaseOfOp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static boolean isBeforeOp(final Cigar cigar,
|
||||
final int currentOperatorIndex,
|
||||
final CigarOperator op,
|
||||
final boolean isLastOp,
|
||||
final boolean isLastBaseOfOp) {
|
||||
return !isLastOp && isLastBaseOfOp && cigar.getCigarElement(currentOperatorIndex+1).getOperator() == op;
|
||||
}
|
||||
|
||||
private static boolean isAfterOp(final Cigar cigar,
|
||||
final int currentOperatorIndex,
|
||||
final CigarOperator op,
|
||||
final boolean isFirstOp,
|
||||
final boolean isFirstBaseOfOp) {
|
||||
return !isFirstOp && isFirstBaseOfOp && cigar.getCigarElement(currentOperatorIndex-1).getOperator() == op;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,252 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.locusiterator;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* testing of the new (non-legacy) version of LocusIteratorByState
|
||||
*/
|
||||
public class LocusIteratorByStateBaseTest extends BaseTest {
|
||||
protected static SAMFileHeader header;
|
||||
protected GenomeLocParser genomeLocParser;
|
||||
|
||||
@BeforeClass
|
||||
public void beforeClass() {
|
||||
header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
}
|
||||
|
||||
/**
|
||||
* For testing only. Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list
|
||||
* for the system.
|
||||
*/
|
||||
protected static List<String> sampleListForSAMWithoutReadGroups() {
|
||||
List<String> samples = new ArrayList<String>();
|
||||
samples.add(null);
|
||||
return samples;
|
||||
}
|
||||
|
||||
protected LocusIteratorByState makeLTBS(List<SAMRecord> reads,
|
||||
ReadProperties readAttributes) {
|
||||
return new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),
|
||||
readAttributes,
|
||||
genomeLocParser,
|
||||
sampleListForSAMWithoutReadGroups());
|
||||
}
|
||||
|
||||
protected static ReadProperties createTestReadProperties() {
|
||||
return createTestReadProperties(null);
|
||||
}
|
||||
|
||||
protected static ReadProperties createTestReadProperties( DownsamplingMethod downsamplingMethod ) {
|
||||
return new ReadProperties(
|
||||
Collections.<SAMReaderID>emptyList(),
|
||||
new SAMFileHeader(),
|
||||
SAMFileHeader.SortOrder.coordinate,
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.STRICT,
|
||||
downsamplingMethod,
|
||||
new ValidationExclusion(),
|
||||
Collections.<ReadFilter>emptyList(),
|
||||
Collections.<ReadTransformer>emptyList(),
|
||||
false,
|
||||
(byte) -1
|
||||
);
|
||||
}
|
||||
|
||||
protected static class FakeCloseableIterator<T> implements CloseableIterator<T> {
|
||||
Iterator<T> iterator;
|
||||
|
||||
public FakeCloseableIterator(Iterator<T> it) {
|
||||
iterator = it;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return iterator.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public T next() {
|
||||
return iterator.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Don't remove!");
|
||||
}
|
||||
}
|
||||
|
||||
protected static class LIBSTest {
|
||||
public static final int locus = 44367788;
|
||||
final String cigar;
|
||||
final int readLength;
|
||||
final private List<CigarElement> elements;
|
||||
|
||||
public LIBSTest(final String cigar, final int readLength) {
|
||||
this(null, cigar, readLength);
|
||||
}
|
||||
|
||||
public LIBSTest(final List<CigarElement> elements, final String cigar, final int readLength) {
|
||||
this.elements = elements;
|
||||
this.cigar = cigar;
|
||||
this.readLength = readLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "LIBSTest{" +
|
||||
"cigar='" + cigar + '\'' +
|
||||
", readLength=" + readLength +
|
||||
'}';
|
||||
}
|
||||
|
||||
public List<CigarElement> getElements() {
|
||||
return elements;
|
||||
}
|
||||
|
||||
public GATKSAMRecord makeRead() {
|
||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, readLength);
|
||||
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||
final byte[] quals = new byte[readLength];
|
||||
for ( int i = 0; i < readLength; i++ )
|
||||
quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE);
|
||||
read.setBaseQualities(quals);
|
||||
read.setCigarString(cigar);
|
||||
return read;
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isIndel(final CigarElement ce) {
|
||||
return ce.getOperator() == CigarOperator.D || ce.getOperator() == CigarOperator.I;
|
||||
}
|
||||
|
||||
private boolean startsWithDeletion(final List<CigarElement> elements) {
|
||||
for ( final CigarElement element : elements ) {
|
||||
switch ( element.getOperator() ) {
|
||||
case M:
|
||||
case I:
|
||||
case EQ:
|
||||
case X:
|
||||
return false;
|
||||
case D:
|
||||
return true;
|
||||
default:
|
||||
// keep looking
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private LIBSTest makePermutationTest(final List<CigarElement> elements) {
|
||||
CigarElement last = null;
|
||||
boolean hasMatch = false;
|
||||
|
||||
// starts with D => bad
|
||||
if ( startsWithDeletion(elements) )
|
||||
return null;
|
||||
|
||||
// ends with D => bad
|
||||
if ( elements.get(elements.size()-1).getOperator() == CigarOperator.D )
|
||||
return null;
|
||||
|
||||
// make sure it's valid
|
||||
String cigar = "";
|
||||
int len = 0;
|
||||
for ( final CigarElement ce : elements ) {
|
||||
if ( ce.getOperator() == CigarOperator.N )
|
||||
return null; // TODO -- don't support N
|
||||
|
||||
// abort on a bad cigar
|
||||
if ( last != null ) {
|
||||
if ( ce.getOperator() == last.getOperator() )
|
||||
return null;
|
||||
if ( isIndel(ce) && isIndel(last) )
|
||||
return null;
|
||||
}
|
||||
|
||||
cigar += ce.getLength() + ce.getOperator().toString();
|
||||
len += ce.getLength();
|
||||
last = ce;
|
||||
hasMatch = hasMatch || ce.getOperator() == CigarOperator.M;
|
||||
}
|
||||
|
||||
if ( ! hasMatch )
|
||||
return null;
|
||||
|
||||
return new LIBSTest(elements, cigar, len);
|
||||
}
|
||||
|
||||
@DataProvider(name = "LIBSTest")
|
||||
public Object[][] createLIBSTests(final List<Integer> cigarLengths, final List<Integer> combinations) {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
||||
final List<CigarOperator> allOps = Arrays.asList(CigarOperator.values());
|
||||
|
||||
final List<CigarElement> singleCigars = new LinkedList<CigarElement>();
|
||||
for ( final int len : cigarLengths )
|
||||
for ( final CigarOperator op : allOps )
|
||||
singleCigars.add(new CigarElement(len, op));
|
||||
|
||||
for ( final int complexity : combinations ) {
|
||||
for ( final List<CigarElement> elements : Utils.makePermutations(singleCigars, complexity, true) ) {
|
||||
final LIBSTest test = makePermutationTest(elements);
|
||||
if ( test != null ) tests.add(new Object[]{test});
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -26,25 +26,16 @@
|
|||
package org.broadinstitute.sting.utils.locusiterator;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
|
@ -53,20 +44,13 @@ import java.util.*;
|
|||
/**
|
||||
* testing of the new (non-legacy) version of LocusIteratorByState
|
||||
*/
|
||||
public class LocusIteratorByStateUnitTest extends BaseTest {
|
||||
private static SAMFileHeader header;
|
||||
private LocusIteratorByState li;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||
|
||||
@BeforeClass
|
||||
public void beforeClass() {
|
||||
header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
}
|
||||
// TODO -- REMOVE ME WHEN LIBS IS FIXED
|
||||
// TODO -- CURRENT CODE DOESN'T CORRECTLY COMPUTE THINGS LIKE BEFORE DELETION, AFTER INSERTION, ETC
|
||||
private final static boolean ALLOW_BROKEN_LIBS_STATE = true;
|
||||
|
||||
private LocusIteratorByState makeLTBS(List<SAMRecord> reads, ReadProperties readAttributes) {
|
||||
return new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()), readAttributes, genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
}
|
||||
protected LocusIteratorByState li;
|
||||
|
||||
@Test
|
||||
public void testXandEQOperators() {
|
||||
|
|
@ -286,53 +270,46 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
// comprehensive LIBS/PileupElement tests //
|
||||
////////////////////////////////////////////
|
||||
|
||||
private static class LIBSTest {
|
||||
|
||||
|
||||
final String cigar;
|
||||
final int readLength;
|
||||
|
||||
private LIBSTest(final String cigar, final int readLength) {
|
||||
this.cigar = cigar;
|
||||
this.readLength = readLength;
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "LIBSTest")
|
||||
public Object[][] createLIBSTestData() {
|
||||
public Object[][] makeLIBSTest() {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
||||
//TODO -- when LIBS is fixed this should be replaced to provide all possible permutations of CIGAR strings
|
||||
tests.add(new Object[]{new LIBSTest("1I", 1)});
|
||||
tests.add(new Object[]{new LIBSTest("10I", 10)});
|
||||
tests.add(new Object[]{new LIBSTest("2M2I2M", 6)});
|
||||
tests.add(new Object[]{new LIBSTest("2M2I", 4)});
|
||||
//TODO -- uncomment these when LIBS is fixed
|
||||
//{new LIBSTest("2I2M", 4, Arrays.asList(2,3), Arrays.asList(IS_AFTER_INSERTION_FLAG,0))},
|
||||
//{new LIBSTest("1I1M1D1M", 3, Arrays.asList(0,1), Arrays.asList(IS_AFTER_INSERTION_FLAG | IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG))},
|
||||
//{new LIBSTest("1S1I1M", 3, Arrays.asList(2), Arrays.asList(IS_AFTER_INSERTION_FLAG))},
|
||||
//{new LIBSTest("1M2D2M", 3)},
|
||||
tests.add(new Object[]{new LIBSTest("1S1M", 2)});
|
||||
tests.add(new Object[]{new LIBSTest("1M1S", 2)});
|
||||
tests.add(new Object[]{new LIBSTest("1S1M1I", 3)});
|
||||
|
||||
return new Object[][]{
|
||||
{new LIBSTest("1I", 1)},
|
||||
{new LIBSTest("10I", 10)},
|
||||
{new LIBSTest("2M2I2M", 6)},
|
||||
{new LIBSTest("2M2I", 4)},
|
||||
//TODO -- uncomment these when LIBS is fixed
|
||||
//{new LIBSTest("2I2M", 4, Arrays.asList(2,3), Arrays.asList(IS_AFTER_INSERTION_FLAG,0))},
|
||||
//{new LIBSTest("1I1M1D1M", 3, Arrays.asList(0,1), Arrays.asList(IS_AFTER_INSERTION_FLAG | IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG))},
|
||||
//{new LIBSTest("1S1I1M", 3, Arrays.asList(2), Arrays.asList(IS_AFTER_INSERTION_FLAG))},
|
||||
//{new LIBSTest("1M2D2M", 3)},
|
||||
{new LIBSTest("1S1M", 2)},
|
||||
{new LIBSTest("1M1S", 2)},
|
||||
{new LIBSTest("1S1M1I", 3)}
|
||||
};
|
||||
return tests.toArray(new Object[][]{});
|
||||
|
||||
// TODO -- enable combinatorial tests here when LIBS is fixed
|
||||
// return createLIBSTests(
|
||||
// Arrays.asList(1, 10),
|
||||
// Arrays.asList(1, 2, 3));
|
||||
}
|
||||
|
||||
@Test(dataProvider = "LIBSTest")
|
||||
public void testLIBS(LIBSTest params) {
|
||||
final int locus = 44367788;
|
||||
|
||||
SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, params.readLength);
|
||||
read.setReadBases(Utils.dupBytes((byte) 'A', params.readLength));
|
||||
read.setBaseQualities(Utils.dupBytes((byte) '@', params.readLength));
|
||||
read.setCigarString(params.cigar);
|
||||
if ( params.getElements() == null || params.getElements().get(0).getOperator() == CigarOperator.I )
|
||||
// TODO -- ENABLE ME WHEN LIBS IS FIXED
|
||||
return;
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(Arrays.asList(read), createTestReadProperties());
|
||||
final GATKSAMRecord read = params.makeRead();
|
||||
li = makeLTBS(Arrays.asList((SAMRecord)read), createTestReadProperties());
|
||||
final LIBS_position tester = new LIBS_position(read);
|
||||
|
||||
int bpVisited = 0;
|
||||
while ( li.hasNext() ) {
|
||||
bpVisited++;
|
||||
|
||||
AlignmentContext alignmentContext = li.next();
|
||||
ReadBackedPileup p = alignmentContext.getBasePileup();
|
||||
Assert.assertTrue(p.getNumberOfElements() == 1);
|
||||
|
|
@ -340,336 +317,68 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
|
||||
tester.stepForwardOnGenome();
|
||||
|
||||
Assert.assertEquals(pe.isBeforeDeletedBase(), tester.isBeforeDeletedBase);
|
||||
Assert.assertEquals(pe.isBeforeDeletionStart(), tester.isBeforeDeletionStart);
|
||||
Assert.assertEquals(pe.isAfterDeletedBase(), tester.isAfterDeletedBase);
|
||||
Assert.assertEquals(pe.isAfterDeletionEnd(), tester.isAfterDeletionEnd);
|
||||
Assert.assertEquals(pe.isBeforeInsertion(), tester.isBeforeInsertion);
|
||||
Assert.assertEquals(pe.isAfterInsertion(), tester.isAfterInsertion);
|
||||
Assert.assertEquals(pe.isNextToSoftClip(), tester.isNextToSoftClip);
|
||||
if ( ! ALLOW_BROKEN_LIBS_STATE ) {
|
||||
Assert.assertEquals(pe.isBeforeDeletedBase(), tester.isBeforeDeletedBase);
|
||||
Assert.assertEquals(pe.isBeforeDeletionStart(), tester.isBeforeDeletionStart);
|
||||
Assert.assertEquals(pe.isAfterDeletedBase(), tester.isAfterDeletedBase);
|
||||
Assert.assertEquals(pe.isAfterDeletionEnd(), tester.isAfterDeletionEnd);
|
||||
Assert.assertEquals(pe.isBeforeInsertion(), tester.isBeforeInsertion);
|
||||
Assert.assertEquals(pe.isAfterInsertion(), tester.isAfterInsertion);
|
||||
Assert.assertEquals(pe.isNextToSoftClip(), tester.isNextToSoftClip);
|
||||
}
|
||||
|
||||
Assert.assertEquals(pe.getOffset(), tester.getCurrentReadOffset());
|
||||
}
|
||||
|
||||
// min is one because always visit something, even for 10I reads
|
||||
final int expectedBpToVisit = Math.max(read.getAlignmentEnd() - read.getAlignmentStart() + 1, 1);
|
||||
Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp");
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// End comprehensive LIBS/PileupElement tests //
|
||||
////////////////////////////////////////////////
|
||||
// ------------------------------------------------------------
|
||||
//
|
||||
// Tests for keeping reads
|
||||
//
|
||||
// ------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "LIBSKeepSubmittedReads")
|
||||
public Object[][] makeLIBSKeepSubmittedReads() {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
||||
///////////////////////////////////////
|
||||
// Read State Manager Tests //
|
||||
///////////////////////////////////////
|
||||
|
||||
private class PerSampleReadStateManagerTest extends TestDataProvider {
|
||||
private List<Integer> readCountsPerAlignmentStart;
|
||||
private List<SAMRecord> reads;
|
||||
private List<ArrayList<LocusIteratorByState.SAMRecordState>> recordStatesByAlignmentStart;
|
||||
private int removalInterval;
|
||||
|
||||
public PerSampleReadStateManagerTest( List<Integer> readCountsPerAlignmentStart, int removalInterval ) {
|
||||
super(PerSampleReadStateManagerTest.class);
|
||||
|
||||
this.readCountsPerAlignmentStart = readCountsPerAlignmentStart;
|
||||
this.removalInterval = removalInterval;
|
||||
|
||||
reads = new ArrayList<SAMRecord>();
|
||||
recordStatesByAlignmentStart = new ArrayList<ArrayList<LocusIteratorByState.SAMRecordState>>();
|
||||
|
||||
setName(String.format("%s: readCountsPerAlignmentStart: %s removalInterval: %d",
|
||||
getClass().getSimpleName(), readCountsPerAlignmentStart, removalInterval));
|
||||
}
|
||||
|
||||
public void run() {
|
||||
LocusIteratorByState libs = makeLTBS(new ArrayList<SAMRecord>(), createTestReadProperties());
|
||||
LocusIteratorByState.ReadStateManager readStateManager =
|
||||
libs.new ReadStateManager(new ArrayList<SAMRecord>().iterator());
|
||||
LocusIteratorByState.ReadStateManager.PerSampleReadStateManager perSampleReadStateManager =
|
||||
readStateManager.new PerSampleReadStateManager();
|
||||
|
||||
makeReads();
|
||||
|
||||
for ( ArrayList<LocusIteratorByState.SAMRecordState> stackRecordStates : recordStatesByAlignmentStart ) {
|
||||
perSampleReadStateManager.addStatesAtNextAlignmentStart(stackRecordStates);
|
||||
}
|
||||
|
||||
// read state manager should have the right number of reads
|
||||
Assert.assertEquals(reads.size(), perSampleReadStateManager.size());
|
||||
|
||||
Iterator<SAMRecord> originalReadsIterator = reads.iterator();
|
||||
Iterator<LocusIteratorByState.SAMRecordState> recordStateIterator = perSampleReadStateManager.iterator();
|
||||
int recordStateCount = 0;
|
||||
int numReadStatesRemoved = 0;
|
||||
|
||||
// Do a first-pass validation of the record state iteration by making sure we get back everything we
|
||||
// put in, in the same order, doing any requested removals of read states along the way
|
||||
while ( recordStateIterator.hasNext() ) {
|
||||
LocusIteratorByState.SAMRecordState readState = recordStateIterator.next();
|
||||
recordStateCount++;
|
||||
SAMRecord readFromPerSampleReadStateManager = readState.getRead();
|
||||
|
||||
Assert.assertTrue(originalReadsIterator.hasNext());
|
||||
SAMRecord originalRead = originalReadsIterator.next();
|
||||
|
||||
// The read we get back should be literally the same read in memory as we put in
|
||||
Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
|
||||
|
||||
// If requested, remove a read state every removalInterval states
|
||||
if ( removalInterval > 0 && recordStateCount % removalInterval == 0 ) {
|
||||
recordStateIterator.remove();
|
||||
numReadStatesRemoved++;
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertFalse(originalReadsIterator.hasNext());
|
||||
|
||||
// If we removed any read states, do a second pass through the read states to make sure the right
|
||||
// states were removed
|
||||
if ( numReadStatesRemoved > 0 ) {
|
||||
Assert.assertEquals(perSampleReadStateManager.size(), reads.size() - numReadStatesRemoved);
|
||||
|
||||
originalReadsIterator = reads.iterator();
|
||||
recordStateIterator = perSampleReadStateManager.iterator();
|
||||
int readCount = 0;
|
||||
int readStateCount = 0;
|
||||
|
||||
// Match record states with the reads that should remain after removal
|
||||
while ( recordStateIterator.hasNext() ) {
|
||||
LocusIteratorByState.SAMRecordState readState = recordStateIterator.next();
|
||||
readStateCount++;
|
||||
SAMRecord readFromPerSampleReadStateManager = readState.getRead();
|
||||
|
||||
Assert.assertTrue(originalReadsIterator.hasNext());
|
||||
|
||||
SAMRecord originalRead = originalReadsIterator.next();
|
||||
readCount++;
|
||||
|
||||
if ( readCount % removalInterval == 0 ) {
|
||||
originalRead = originalReadsIterator.next(); // advance to next read, since the previous one should have been discarded
|
||||
readCount++;
|
||||
for ( final int nReadsPerLocus : Arrays.asList(1, 10) ) {
|
||||
for ( final int nLoci : Arrays.asList(1, 10, 100, 1000) ) {
|
||||
for ( final int nSamples : Arrays.asList(1, 2, 100) ) {
|
||||
for ( final boolean keepReads : Arrays.asList(true, false) ) {
|
||||
tests.add(new Object[]{nReadsPerLocus, nLoci, nSamples, keepReads});
|
||||
}
|
||||
|
||||
// The read we get back should be literally the same read in memory as we put in (after accounting for removals)
|
||||
Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
|
||||
}
|
||||
|
||||
Assert.assertEquals(readStateCount, reads.size() - numReadStatesRemoved);
|
||||
}
|
||||
|
||||
// Allow memory used by this test to be reclaimed
|
||||
readCountsPerAlignmentStart = null;
|
||||
reads = null;
|
||||
recordStatesByAlignmentStart = null;
|
||||
}
|
||||
|
||||
private void makeReads() {
|
||||
int alignmentStart = 1;
|
||||
|
||||
for ( int readsThisStack : readCountsPerAlignmentStart ) {
|
||||
ArrayList<SAMRecord> stackReads = new ArrayList<SAMRecord>(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(readsThisStack, header, "foo", 0, alignmentStart, MathUtils.randomIntegerInRange(50, 100)));
|
||||
ArrayList<LocusIteratorByState.SAMRecordState> stackRecordStates = new ArrayList<LocusIteratorByState.SAMRecordState>();
|
||||
|
||||
for ( SAMRecord read : stackReads ) {
|
||||
stackRecordStates.add(new LocusIteratorByState.SAMRecordState(read));
|
||||
}
|
||||
|
||||
reads.addAll(stackReads);
|
||||
recordStatesByAlignmentStart.add(stackRecordStates);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "PerSampleReadStateManagerTestDataProvider")
|
||||
public Object[][] createPerSampleReadStateManagerTests() {
|
||||
for ( List<Integer> thisTestReadStateCounts : Arrays.asList( Arrays.asList(1),
|
||||
Arrays.asList(2),
|
||||
Arrays.asList(10),
|
||||
Arrays.asList(1, 1),
|
||||
Arrays.asList(2, 2),
|
||||
Arrays.asList(10, 10),
|
||||
Arrays.asList(1, 10),
|
||||
Arrays.asList(10, 1),
|
||||
Arrays.asList(1, 1, 1),
|
||||
Arrays.asList(2, 2, 2),
|
||||
Arrays.asList(10, 10, 10),
|
||||
Arrays.asList(1, 1, 1, 1, 1, 1),
|
||||
Arrays.asList(10, 10, 10, 10, 10, 10),
|
||||
Arrays.asList(1, 2, 10, 1, 2, 10)
|
||||
) ) {
|
||||
|
||||
for ( int removalInterval : Arrays.asList(0, 2, 3) ) {
|
||||
new PerSampleReadStateManagerTest(thisTestReadStateCounts, removalInterval);
|
||||
}
|
||||
}
|
||||
|
||||
return PerSampleReadStateManagerTest.getTests(PerSampleReadStateManagerTest.class);
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "PerSampleReadStateManagerTestDataProvider")
|
||||
public void runPerSampleReadStateManagerTest( PerSampleReadStateManagerTest test ) {
|
||||
logger.warn("Running test: " + test);
|
||||
@Test(enabled = false, dataProvider = "LIBSKeepSubmittedReads")
|
||||
public void testLIBSKeepSubmittedReads(final int nReadsPerLocus, final int nLoci, final int nSamples, final boolean keepReads) {
|
||||
final int readLength = 10;
|
||||
|
||||
test.run();
|
||||
}
|
||||
|
||||
///////////////////////////////////////
|
||||
// End Read State Manager Tests //
|
||||
///////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////
|
||||
// Helper methods / classes //
|
||||
///////////////////////////////////////
|
||||
|
||||
private static ReadProperties createTestReadProperties() {
|
||||
return createTestReadProperties(null);
|
||||
}
|
||||
|
||||
private static ReadProperties createTestReadProperties( DownsamplingMethod downsamplingMethod ) {
|
||||
return new ReadProperties(
|
||||
Collections.<SAMReaderID>emptyList(),
|
||||
new SAMFileHeader(),
|
||||
SAMFileHeader.SortOrder.coordinate,
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.STRICT,
|
||||
downsamplingMethod,
|
||||
new ValidationExclusion(),
|
||||
Collections.<ReadFilter>emptyList(),
|
||||
Collections.<ReadTransformer>emptyList(),
|
||||
false,
|
||||
(byte) -1
|
||||
);
|
||||
}
|
||||
|
||||
private static class FakeCloseableIterator<T> implements CloseableIterator<T> {
|
||||
Iterator<T> iterator;
|
||||
|
||||
public FakeCloseableIterator(Iterator<T> it) {
|
||||
iterator = it;
|
||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 100000);
|
||||
for ( int i = 0; i < nSamples; i++ ) {
|
||||
final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("rg" + i);
|
||||
rg.setSample("sample" + i);
|
||||
rg.setPlatform(NGSPlatform.ILLUMINA.getDefaultPlatform());
|
||||
header.addReadGroup(rg);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {}
|
||||
final List<SAMRecord> reads = ArtificialSAMUtils.createReadStream(nReadsPerLocus, nLoci, header, 1, readLength);
|
||||
li = makeLTBS(reads, createTestReadProperties());
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return iterator.hasNext();
|
||||
int bpVisited = 0;
|
||||
while ( li.hasNext() ) {
|
||||
bpVisited++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public T next() {
|
||||
return iterator.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Don't remove!");
|
||||
}
|
||||
}
|
||||
|
||||
private static final class LIBS_position {
|
||||
|
||||
SAMRecord read;
|
||||
|
||||
final int numOperators;
|
||||
int currentOperatorIndex = 0;
|
||||
int currentPositionOnOperator = 0;
|
||||
int currentReadOffset = 0;
|
||||
|
||||
boolean isBeforeDeletionStart = false;
|
||||
boolean isBeforeDeletedBase = false;
|
||||
boolean isAfterDeletionEnd = false;
|
||||
boolean isAfterDeletedBase = false;
|
||||
boolean isBeforeInsertion = false;
|
||||
boolean isAfterInsertion = false;
|
||||
boolean isNextToSoftClip = false;
|
||||
|
||||
boolean sawMop = false;
|
||||
|
||||
public LIBS_position(final SAMRecord read) {
|
||||
this.read = read;
|
||||
numOperators = read.getCigar().numCigarElements();
|
||||
}
|
||||
|
||||
public int getCurrentReadOffset() {
|
||||
return Math.max(0, currentReadOffset - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Steps forward on the genome. Returns false when done reading the read, true otherwise.
|
||||
*/
|
||||
public boolean stepForwardOnGenome() {
|
||||
if ( currentOperatorIndex == numOperators )
|
||||
return false;
|
||||
|
||||
CigarElement curElement = read.getCigar().getCigarElement(currentOperatorIndex);
|
||||
if ( currentPositionOnOperator >= curElement.getLength() ) {
|
||||
if ( ++currentOperatorIndex == numOperators )
|
||||
return false;
|
||||
|
||||
curElement = read.getCigar().getCigarElement(currentOperatorIndex);
|
||||
currentPositionOnOperator = 0;
|
||||
}
|
||||
|
||||
switch ( curElement.getOperator() ) {
|
||||
case I: // insertion w.r.t. the reference
|
||||
if ( !sawMop )
|
||||
break;
|
||||
case S: // soft clip
|
||||
currentReadOffset += curElement.getLength();
|
||||
case H: // hard clip
|
||||
case P: // padding
|
||||
currentOperatorIndex++;
|
||||
return stepForwardOnGenome();
|
||||
|
||||
case D: // deletion w.r.t. the reference
|
||||
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
||||
currentPositionOnOperator++;
|
||||
break;
|
||||
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
sawMop = true;
|
||||
currentReadOffset++;
|
||||
currentPositionOnOperator++;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("No support for cigar op: " + curElement.getOperator());
|
||||
}
|
||||
|
||||
final boolean isFirstOp = currentOperatorIndex == 0;
|
||||
final boolean isLastOp = currentOperatorIndex == numOperators - 1;
|
||||
final boolean isFirstBaseOfOp = currentPositionOnOperator == 1;
|
||||
final boolean isLastBaseOfOp = currentPositionOnOperator == curElement.getLength();
|
||||
|
||||
isBeforeDeletionStart = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isLastOp, isLastBaseOfOp);
|
||||
isBeforeDeletedBase = isBeforeDeletionStart || (!isLastBaseOfOp && curElement.getOperator() == CigarOperator.D);
|
||||
isAfterDeletionEnd = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isFirstOp, isFirstBaseOfOp);
|
||||
isAfterDeletedBase = isAfterDeletionEnd || (!isFirstBaseOfOp && curElement.getOperator() == CigarOperator.D);
|
||||
isBeforeInsertion = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isLastOp, isLastBaseOfOp)
|
||||
|| (!sawMop && curElement.getOperator() == CigarOperator.I);
|
||||
isAfterInsertion = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isFirstOp, isFirstBaseOfOp);
|
||||
isNextToSoftClip = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isLastOp, isLastBaseOfOp)
|
||||
|| isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isFirstOp, isFirstBaseOfOp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static boolean isBeforeOp(final Cigar cigar,
|
||||
final int currentOperatorIndex,
|
||||
final CigarOperator op,
|
||||
final boolean isLastOp,
|
||||
final boolean isLastBaseOfOp) {
|
||||
return !isLastOp && isLastBaseOfOp && cigar.getCigarElement(currentOperatorIndex+1).getOperator() == op;
|
||||
}
|
||||
|
||||
private static boolean isAfterOp(final Cigar cigar,
|
||||
final int currentOperatorIndex,
|
||||
final CigarOperator op,
|
||||
final boolean isFirstOp,
|
||||
final boolean isFirstBaseOfOp) {
|
||||
return !isFirstOp && isFirstBaseOfOp && cigar.getCigarElement(currentOperatorIndex-1).getOperator() == op;
|
||||
}
|
||||
final int expectedBpToVisit = nLoci + readLength;
|
||||
Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,214 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.locusiterator;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* testing of the new (non-legacy) version of LocusIteratorByState
|
||||
*/
|
||||
public class ReadStateManagerUnitTest extends LocusIteratorByStateBaseTest {
|
||||
///////////////////////////////////////
|
||||
// Read State Manager Tests //
|
||||
///////////////////////////////////////
|
||||
|
||||
private class PerSampleReadStateManagerTest extends TestDataProvider {
|
||||
private List<Integer> readCountsPerAlignmentStart;
|
||||
private List<SAMRecord> reads;
|
||||
private List<ArrayList<SAMRecordAlignmentState>> recordStatesByAlignmentStart;
|
||||
private int removalInterval;
|
||||
|
||||
public PerSampleReadStateManagerTest( List<Integer> readCountsPerAlignmentStart, int removalInterval ) {
|
||||
super(PerSampleReadStateManagerTest.class);
|
||||
|
||||
this.readCountsPerAlignmentStart = readCountsPerAlignmentStart;
|
||||
this.removalInterval = removalInterval;
|
||||
|
||||
reads = new ArrayList<SAMRecord>();
|
||||
recordStatesByAlignmentStart = new ArrayList<ArrayList<SAMRecordAlignmentState>>();
|
||||
|
||||
setName(String.format("%s: readCountsPerAlignmentStart: %s removalInterval: %d",
|
||||
getClass().getSimpleName(), readCountsPerAlignmentStart, removalInterval));
|
||||
}
|
||||
|
||||
public void run() {
|
||||
final List<String> samples = sampleListForSAMWithoutReadGroups();
|
||||
final Iterator<SAMRecord> iterator = new LinkedList<SAMRecord>().iterator();
|
||||
ReadStateManager readStateManager = new ReadStateManager(iterator, samples, LIBSDownsamplingInfo.NO_DOWNSAMPLING);
|
||||
ReadStateManager.PerSampleReadStateManager perSampleReadStateManager = readStateManager.new PerSampleReadStateManager(LIBSDownsamplingInfo.NO_DOWNSAMPLING);
|
||||
|
||||
// ReadStateManager readStateManager =
|
||||
// libs.new ReadStateManager(new ArrayList<SAMRecord>().iterator());
|
||||
// ReadStateManager.PerSampleReadStateManager perSampleReadStateManager =
|
||||
// readStateManager.new PerSampleReadStateManager();
|
||||
|
||||
makeReads();
|
||||
|
||||
for ( ArrayList<SAMRecordAlignmentState> stackRecordStates : recordStatesByAlignmentStart ) {
|
||||
perSampleReadStateManager.addStatesAtNextAlignmentStart(stackRecordStates);
|
||||
}
|
||||
|
||||
// read state manager should have the right number of reads
|
||||
Assert.assertEquals(reads.size(), perSampleReadStateManager.size());
|
||||
|
||||
Iterator<SAMRecord> originalReadsIterator = reads.iterator();
|
||||
Iterator<SAMRecordAlignmentState> recordStateIterator = perSampleReadStateManager.iterator();
|
||||
int recordStateCount = 0;
|
||||
int numReadStatesRemoved = 0;
|
||||
|
||||
// Do a first-pass validation of the record state iteration by making sure we get back everything we
|
||||
// put in, in the same order, doing any requested removals of read states along the way
|
||||
while ( recordStateIterator.hasNext() ) {
|
||||
SAMRecordAlignmentState readState = recordStateIterator.next();
|
||||
recordStateCount++;
|
||||
SAMRecord readFromPerSampleReadStateManager = readState.getRead();
|
||||
|
||||
Assert.assertTrue(originalReadsIterator.hasNext());
|
||||
SAMRecord originalRead = originalReadsIterator.next();
|
||||
|
||||
// The read we get back should be literally the same read in memory as we put in
|
||||
Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
|
||||
|
||||
// If requested, remove a read state every removalInterval states
|
||||
if ( removalInterval > 0 && recordStateCount % removalInterval == 0 ) {
|
||||
recordStateIterator.remove();
|
||||
numReadStatesRemoved++;
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertFalse(originalReadsIterator.hasNext());
|
||||
|
||||
// If we removed any read states, do a second pass through the read states to make sure the right
|
||||
// states were removed
|
||||
if ( numReadStatesRemoved > 0 ) {
|
||||
Assert.assertEquals(perSampleReadStateManager.size(), reads.size() - numReadStatesRemoved);
|
||||
|
||||
originalReadsIterator = reads.iterator();
|
||||
recordStateIterator = perSampleReadStateManager.iterator();
|
||||
int readCount = 0;
|
||||
int readStateCount = 0;
|
||||
|
||||
// Match record states with the reads that should remain after removal
|
||||
while ( recordStateIterator.hasNext() ) {
|
||||
SAMRecordAlignmentState readState = recordStateIterator.next();
|
||||
readStateCount++;
|
||||
SAMRecord readFromPerSampleReadStateManager = readState.getRead();
|
||||
|
||||
Assert.assertTrue(originalReadsIterator.hasNext());
|
||||
|
||||
SAMRecord originalRead = originalReadsIterator.next();
|
||||
readCount++;
|
||||
|
||||
if ( readCount % removalInterval == 0 ) {
|
||||
originalRead = originalReadsIterator.next(); // advance to next read, since the previous one should have been discarded
|
||||
readCount++;
|
||||
}
|
||||
|
||||
// The read we get back should be literally the same read in memory as we put in (after accounting for removals)
|
||||
Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
|
||||
}
|
||||
|
||||
Assert.assertEquals(readStateCount, reads.size() - numReadStatesRemoved);
|
||||
}
|
||||
|
||||
// Allow memory used by this test to be reclaimed
|
||||
readCountsPerAlignmentStart = null;
|
||||
reads = null;
|
||||
recordStatesByAlignmentStart = null;
|
||||
}
|
||||
|
||||
private void makeReads() {
|
||||
int alignmentStart = 1;
|
||||
|
||||
for ( int readsThisStack : readCountsPerAlignmentStart ) {
|
||||
ArrayList<SAMRecord> stackReads = new ArrayList<SAMRecord>(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(readsThisStack, header, "foo", 0, alignmentStart, MathUtils.randomIntegerInRange(50, 100)));
|
||||
ArrayList<SAMRecordAlignmentState> stackRecordStates = new ArrayList<SAMRecordAlignmentState>();
|
||||
|
||||
for ( SAMRecord read : stackReads ) {
|
||||
stackRecordStates.add(new SAMRecordAlignmentState(read));
|
||||
}
|
||||
|
||||
reads.addAll(stackReads);
|
||||
recordStatesByAlignmentStart.add(stackRecordStates);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "PerSampleReadStateManagerTestDataProvider")
|
||||
public Object[][] createPerSampleReadStateManagerTests() {
|
||||
for ( List<Integer> thisTestReadStateCounts : Arrays.asList( Arrays.asList(1),
|
||||
Arrays.asList(2),
|
||||
Arrays.asList(10),
|
||||
Arrays.asList(1, 1),
|
||||
Arrays.asList(2, 2),
|
||||
Arrays.asList(10, 10),
|
||||
Arrays.asList(1, 10),
|
||||
Arrays.asList(10, 1),
|
||||
Arrays.asList(1, 1, 1),
|
||||
Arrays.asList(2, 2, 2),
|
||||
Arrays.asList(10, 10, 10),
|
||||
Arrays.asList(1, 1, 1, 1, 1, 1),
|
||||
Arrays.asList(10, 10, 10, 10, 10, 10),
|
||||
Arrays.asList(1, 2, 10, 1, 2, 10)
|
||||
) ) {
|
||||
|
||||
for ( int removalInterval : Arrays.asList(0, 2, 3) ) {
|
||||
new PerSampleReadStateManagerTest(thisTestReadStateCounts, removalInterval);
|
||||
}
|
||||
}
|
||||
|
||||
return PerSampleReadStateManagerTest.getTests(PerSampleReadStateManagerTest.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "PerSampleReadStateManagerTestDataProvider")
|
||||
public void runPerSampleReadStateManagerTest( PerSampleReadStateManagerTest test ) {
|
||||
logger.warn("Running test: " + test);
|
||||
|
||||
test.run();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.locusiterator;
|
||||
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* testing of the new (non-legacy) version of LocusIteratorByState
|
||||
*/
|
||||
public class SAMRecordAlignmentStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||
@DataProvider(name = "AlignmentStateTest")
|
||||
public Object[][] makeAlignmentStateTest() {
|
||||
// return new Object[][]{{new LIBSTest("1I", 1)}};
|
||||
return createLIBSTests(
|
||||
Arrays.asList(1, 2),
|
||||
Arrays.asList(1, 2, 3, 4));
|
||||
}
|
||||
|
||||
@Test(dataProvider = "AlignmentStateTest")
|
||||
public void testAlignmentStateTest(LIBSTest params) {
|
||||
final GATKSAMRecord read = params.makeRead();
|
||||
final SAMRecordAlignmentState state = new SAMRecordAlignmentState(read);
|
||||
final LIBS_position tester = new LIBS_position(read);
|
||||
|
||||
Assert.assertSame(state.getRead(), read);
|
||||
Assert.assertNotNull(state.toString());
|
||||
|
||||
int bpVisited = 0;
|
||||
int lastOffset = -1;
|
||||
while ( state.stepForwardOnGenome() != null ) {
|
||||
bpVisited++;
|
||||
tester.stepForwardOnGenome();
|
||||
Assert.assertTrue(state.getReadOffset() >= lastOffset, "Somehow read offsets are decreasing: lastOffset " + lastOffset + " current " + state.getReadOffset());
|
||||
Assert.assertEquals(state.getReadOffset(), tester.getCurrentReadOffset(), "Read offsets are wrong at " + bpVisited);
|
||||
|
||||
// TODO -- state.peekBackwardOnGenome();
|
||||
// TODO -- state.peekForwardOnGenome();
|
||||
// TODO -- state.getCurrentCigarOperator()
|
||||
// TODO -- state.getGenomeOffset();
|
||||
// TODO -- state.getGenomePosition();
|
||||
// TODO -- Assert.assertEquals(state.getLocation(genomeLocParser), EXPECTATION);
|
||||
|
||||
lastOffset = state.getReadOffset();
|
||||
}
|
||||
|
||||
// min is one because always visit something, even for 10I reads
|
||||
final int expectedBpToVisit = read.getAlignmentEnd() - read.getAlignmentStart() + 1;
|
||||
Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,160 @@
|
|||
package org.broadinstitute.sting.utils.locusiterator.legacy;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.locusiterator.legacy.LegacyLocusIteratorByState;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
class FakeCloseableIterator<T> implements CloseableIterator<T> {
|
||||
Iterator<T> iterator;
|
||||
|
||||
public FakeCloseableIterator(Iterator<T> it) {
|
||||
iterator = it;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return iterator.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public T next() {
|
||||
return iterator.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Don't remove!");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
final class LIBS_position {
|
||||
|
||||
SAMRecord read;
|
||||
|
||||
final int numOperators;
|
||||
int currentOperatorIndex = 0;
|
||||
int currentPositionOnOperator = 0;
|
||||
int currentReadOffset = 0;
|
||||
|
||||
boolean isBeforeDeletionStart = false;
|
||||
boolean isBeforeDeletedBase = false;
|
||||
boolean isAfterDeletionEnd = false;
|
||||
boolean isAfterDeletedBase = false;
|
||||
boolean isBeforeInsertion = false;
|
||||
boolean isAfterInsertion = false;
|
||||
boolean isNextToSoftClip = false;
|
||||
|
||||
boolean sawMop = false;
|
||||
|
||||
public LIBS_position(final SAMRecord read) {
|
||||
this.read = read;
|
||||
numOperators = read.getCigar().numCigarElements();
|
||||
}
|
||||
|
||||
public int getCurrentReadOffset() {
|
||||
return Math.max(0, currentReadOffset - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Steps forward on the genome. Returns false when done reading the read, true otherwise.
|
||||
*/
|
||||
public boolean stepForwardOnGenome() {
|
||||
if ( currentOperatorIndex == numOperators )
|
||||
return false;
|
||||
|
||||
CigarElement curElement = read.getCigar().getCigarElement(currentOperatorIndex);
|
||||
if ( currentPositionOnOperator >= curElement.getLength() ) {
|
||||
if ( ++currentOperatorIndex == numOperators )
|
||||
return false;
|
||||
|
||||
curElement = read.getCigar().getCigarElement(currentOperatorIndex);
|
||||
currentPositionOnOperator = 0;
|
||||
}
|
||||
|
||||
switch ( curElement.getOperator() ) {
|
||||
case I: // insertion w.r.t. the reference
|
||||
if ( !sawMop )
|
||||
break;
|
||||
case S: // soft clip
|
||||
currentReadOffset += curElement.getLength();
|
||||
case H: // hard clip
|
||||
case P: // padding
|
||||
currentOperatorIndex++;
|
||||
return stepForwardOnGenome();
|
||||
|
||||
case D: // deletion w.r.t. the reference
|
||||
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
||||
currentPositionOnOperator++;
|
||||
break;
|
||||
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
sawMop = true;
|
||||
currentReadOffset++;
|
||||
currentPositionOnOperator++;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("No support for cigar op: " + curElement.getOperator());
|
||||
}
|
||||
|
||||
final boolean isFirstOp = currentOperatorIndex == 0;
|
||||
final boolean isLastOp = currentOperatorIndex == numOperators - 1;
|
||||
final boolean isFirstBaseOfOp = currentPositionOnOperator == 1;
|
||||
final boolean isLastBaseOfOp = currentPositionOnOperator == curElement.getLength();
|
||||
|
||||
isBeforeDeletionStart = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isLastOp, isLastBaseOfOp);
|
||||
isBeforeDeletedBase = isBeforeDeletionStart || (!isLastBaseOfOp && curElement.getOperator() == CigarOperator.D);
|
||||
isAfterDeletionEnd = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isFirstOp, isFirstBaseOfOp);
|
||||
isAfterDeletedBase = isAfterDeletionEnd || (!isFirstBaseOfOp && curElement.getOperator() == CigarOperator.D);
|
||||
isBeforeInsertion = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isLastOp, isLastBaseOfOp)
|
||||
|| (!sawMop && curElement.getOperator() == CigarOperator.I);
|
||||
isAfterInsertion = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isFirstOp, isFirstBaseOfOp);
|
||||
isNextToSoftClip = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isLastOp, isLastBaseOfOp)
|
||||
|| isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isFirstOp, isFirstBaseOfOp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static boolean isBeforeOp(final Cigar cigar,
|
||||
final int currentOperatorIndex,
|
||||
final CigarOperator op,
|
||||
final boolean isLastOp,
|
||||
final boolean isLastBaseOfOp) {
|
||||
return !isLastOp && isLastBaseOfOp && cigar.getCigarElement(currentOperatorIndex+1).getOperator() == op;
|
||||
}
|
||||
|
||||
private static boolean isAfterOp(final Cigar cigar,
|
||||
final int currentOperatorIndex,
|
||||
final CigarOperator op,
|
||||
final boolean isFirstOp,
|
||||
final boolean isFirstBaseOfOp) {
|
||||
return !isFirstOp && isFirstBaseOfOp && cigar.getCigarElement(currentOperatorIndex-1).getOperator() == op;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue