Contracts and documentation for AlignmentStateMachine and LocusIteratorByState
-- Add more unit tests for both as well
This commit is contained in:
parent
cc1d259cac
commit
2f2a592c8e
|
|
@ -25,6 +25,9 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.locusiterator;
|
package org.broadinstitute.sting.utils.locusiterator;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Invariant;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
import net.sf.samtools.Cigar;
|
import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
|
|
@ -40,16 +43,18 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
* implements the traversal along the reference; thus stepForwardOnGenome() returns
|
* implements the traversal along the reference; thus stepForwardOnGenome() returns
|
||||||
* on every and only on actual reference bases. This can be a (mis)match or a deletion
|
* on every and only on actual reference bases. This can be a (mis)match or a deletion
|
||||||
* (in the latter case, we still return on every individual reference base the deletion spans).
|
* (in the latter case, we still return on every individual reference base the deletion spans).
|
||||||
* In the extended events mode, the record state also remembers if there was an insertion, or
|
|
||||||
* if the deletion just started *right before* the current reference base the record state is
|
|
||||||
* pointing to upon the return from stepForwardOnGenome(). The next call to stepForwardOnGenome()
|
|
||||||
* will clear that memory (as we remember only extended events immediately preceding
|
|
||||||
* the current reference base).
|
|
||||||
*
|
*
|
||||||
* User: depristo
|
* User: depristo
|
||||||
* Date: 1/5/13
|
* Date: 1/5/13
|
||||||
* Time: 1:08 PM
|
* Time: 1:08 PM
|
||||||
*/
|
*/
|
||||||
|
@Invariant({
|
||||||
|
"nCigarElements >= 0",
|
||||||
|
"cigar != null",
|
||||||
|
"read != null",
|
||||||
|
"currentCigarElementOffset >= -1",
|
||||||
|
"currentCigarElementOffset <= nCigarElements"
|
||||||
|
})
|
||||||
class AlignmentStateMachine {
|
class AlignmentStateMachine {
|
||||||
/**
|
/**
|
||||||
* Our read
|
* Our read
|
||||||
|
|
@ -79,6 +84,7 @@ class AlignmentStateMachine {
|
||||||
*/
|
*/
|
||||||
private int offsetIntoCurrentCigarElement;
|
private int offsetIntoCurrentCigarElement;
|
||||||
|
|
||||||
|
@Requires({"read != null", "read.getAlignmentStart() != -1", "read.getCigar() != null"})
|
||||||
public AlignmentStateMachine(final SAMRecord read) {
|
public AlignmentStateMachine(final SAMRecord read) {
|
||||||
this.read = read;
|
this.read = read;
|
||||||
this.cigar = read.getCigar();
|
this.cigar = read.getCigar();
|
||||||
|
|
@ -86,28 +92,48 @@ class AlignmentStateMachine {
|
||||||
initializeAsLeftEdge();
|
initializeAsLeftEdge();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize the state variables to put this machine one bp before the
|
||||||
|
* start of the alignment, so that a call to stepForwardOnGenome() will advance
|
||||||
|
* us to the first proper location
|
||||||
|
*/
|
||||||
|
@Ensures("isLeftEdge()")
|
||||||
private void initializeAsLeftEdge() {
|
private void initializeAsLeftEdge() {
|
||||||
readOffset = offsetIntoCurrentCigarElement = genomeOffset = -1;
|
readOffset = offsetIntoCurrentCigarElement = genomeOffset = -1;
|
||||||
currentElement = null;
|
currentElement = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the read we are aligning to the genome
|
||||||
|
* @return a non-null GATKSAMRecord
|
||||||
|
*/
|
||||||
|
@Ensures("result != null")
|
||||||
public SAMRecord getRead() {
|
public SAMRecord getRead() {
|
||||||
return read;
|
return read;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is this an edge state? I.e., one that is before or after the current read?
|
* Is this the left edge state? I.e., one that is before or after the current read?
|
||||||
* @return true if this state is an edge state, false otherwise
|
* @return true if this state is an edge state, false otherwise
|
||||||
*/
|
*/
|
||||||
public boolean isEdge() {
|
public boolean isLeftEdge() {
|
||||||
return readOffset == -1;
|
return readOffset == -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Are we on the right edge? I.e., is the current state off the right of the alignment?
|
||||||
|
* @return true if off the right edge, false if otherwise
|
||||||
|
*/
|
||||||
|
public boolean isRightEdge() {
|
||||||
|
return readOffset == read.getReadLength();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* What is our current offset in the read's bases that aligns us with the reference genome?
|
* What is our current offset in the read's bases that aligns us with the reference genome?
|
||||||
*
|
*
|
||||||
* @return the current read offset position
|
* @return the current read offset position. If an edge will be == -1
|
||||||
*/
|
*/
|
||||||
|
@Ensures("result >= -1")
|
||||||
public int getReadOffset() {
|
public int getReadOffset() {
|
||||||
return readOffset;
|
return readOffset;
|
||||||
}
|
}
|
||||||
|
|
@ -115,39 +141,96 @@ class AlignmentStateMachine {
|
||||||
/**
|
/**
|
||||||
* What is the current offset w.r.t. the alignment state that aligns us to the readOffset?
|
* What is the current offset w.r.t. the alignment state that aligns us to the readOffset?
|
||||||
*
|
*
|
||||||
* @return the current offset
|
* @return the current offset from the alignment start on the genome. If this state is
|
||||||
|
* at the left edge the result will be -1;
|
||||||
*/
|
*/
|
||||||
|
@Ensures("result >= -1")
|
||||||
public int getGenomeOffset() {
|
public int getGenomeOffset() {
|
||||||
return genomeOffset;
|
return genomeOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the position (1-based as standard) of the current alignment on the genome w.r.t. the read's alignment start
|
||||||
|
* @return the position on the genome of the current state in absolute coordinates
|
||||||
|
*/
|
||||||
|
@Ensures("result > 0")
|
||||||
public int getGenomePosition() {
|
public int getGenomePosition() {
|
||||||
return read.getAlignmentStart() + getGenomeOffset();
|
return read.getAlignmentStart() + getGenomeOffset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets #getGenomePosition but as a 1 bp GenomeLoc
|
||||||
|
* @param genomeLocParser the parser to use to create the genome loc
|
||||||
|
* @return a non-null genome location with start position of getGenomePosition
|
||||||
|
*/
|
||||||
|
@Requires("genomeLocParser != null")
|
||||||
|
@Ensures("result != null")
|
||||||
public GenomeLoc getLocation(final GenomeLocParser genomeLocParser) {
|
public GenomeLoc getLocation(final GenomeLocParser genomeLocParser) {
|
||||||
|
// TODO -- may return wonky results if on an edge (could be 0 or could be beyond genome location)
|
||||||
return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
|
return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the cigar element we're currently aligning with.
|
||||||
|
*
|
||||||
|
* For example, if the cigar string is 2M2D2M and we're in the second step of the
|
||||||
|
* first 2M, then this function returns the element 2M. After calling stepForwardOnGenome
|
||||||
|
* this function would return 2D.
|
||||||
|
*
|
||||||
|
* @return the cigar element, or null if we're the left edge
|
||||||
|
*/
|
||||||
|
@Ensures("result != null || isLeftEdge() || isRightEdge()")
|
||||||
public CigarElement getCurrentCigarElement() {
|
public CigarElement getCurrentCigarElement() {
|
||||||
return currentElement;
|
return currentElement;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the offset of the current cigar element among all cigar elements in the read
|
||||||
|
*
|
||||||
|
* Suppose our read's cigar is 1M2D3M, and we're at the first 1M. This would
|
||||||
|
* return 0. Stepping forward puts us in the 2D, so our offset is 1. Another
|
||||||
|
* step forward would result in a 1 again (we're in the second position of the 2D).
|
||||||
|
* Finally, one more step forward brings us to 2 (for the 3M element)
|
||||||
|
*
|
||||||
|
* @return the offset of the current cigar element in the reads's cigar. Will return -1 for
|
||||||
|
* when the state is on the left edge, and be == the number of cigar elements in the
|
||||||
|
* read when we're past the last position on the genome
|
||||||
|
*/
|
||||||
|
@Ensures({"result >= -1", "result <= nCigarElements"})
|
||||||
public int getCurrentCigarElementOffset() {
|
public int getCurrentCigarElementOffset() {
|
||||||
return currentCigarElementOffset;
|
return currentCigarElementOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the offset of the current state into the current cigar element
|
||||||
|
*
|
||||||
|
* That is, suppose we have a read with cigar 2M3D4M, and we're right at
|
||||||
|
* the second M position. offsetIntoCurrentCigarElement would be 1, as
|
||||||
|
* it's two elements into the 2M cigar. Now stepping forward we'd be
|
||||||
|
* in cigar element 3D, and our offsetIntoCurrentCigarElement would be 0.
|
||||||
|
*
|
||||||
|
* @return the offset (from 0) of the current state in the current cigar element.
|
||||||
|
* Will be 0 on the right edge, and -1 on the left.
|
||||||
|
*/
|
||||||
|
@Ensures({"result >= 0 || (result == -1 && isLeftEdge())", "!isRightEdge() || result == 0"})
|
||||||
public int getOffsetIntoCurrentCigarElement() {
|
public int getOffsetIntoCurrentCigarElement() {
|
||||||
return offsetIntoCurrentCigarElement;
|
return offsetIntoCurrentCigarElement;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* Convenience accessor of the CigarOperator of the current cigar element
|
||||||
|
*
|
||||||
|
* Robust to the case where we're on the edge, and currentElement is null, in which
|
||||||
|
* case this function returns null as well
|
||||||
|
*
|
||||||
* @return null if this is an edge state
|
* @return null if this is an edge state
|
||||||
*/
|
*/
|
||||||
|
@Ensures("result != null || isLeftEdge() || isRightEdge()")
|
||||||
public CigarOperator getCigarOperator() {
|
public CigarOperator getCigarOperator() {
|
||||||
return currentElement == null ? null : currentElement.getOperator();
|
return currentElement == null ? null : currentElement.getOperator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("%s ro=%d go=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, offsetIntoCurrentCigarElement, currentElement);
|
return String.format("%s ro=%d go=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, offsetIntoCurrentCigarElement, currentElement);
|
||||||
}
|
}
|
||||||
|
|
@ -158,6 +241,29 @@ class AlignmentStateMachine {
|
||||||
//
|
//
|
||||||
// -----------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Step the state machine forward one unit
|
||||||
|
*
|
||||||
|
* Takes the current state of this machine, and advances the state until the next on-genome
|
||||||
|
* cigar element (M, X, =, D) is encountered, at which point this function returns with the
|
||||||
|
* cigar operator of the current element.
|
||||||
|
*
|
||||||
|
* Assumes that the AlignmentStateMachine is in the left edge state at the start, so that
|
||||||
|
* stepForwardOnGenome() can be called to move the machine to the first alignment position. That
|
||||||
|
* is, the normal use of this code is:
|
||||||
|
*
|
||||||
|
* AlignmentStateMachine machine = new AlignmentStateMachine(read)
|
||||||
|
* machine.stepForwardOnGenome()
|
||||||
|
* // now the machine is at the first position on the genome
|
||||||
|
*
|
||||||
|
* When stepForwardOnGenome() advances off the right edge of the read, the state machine is
|
||||||
|
* left in a state such that isRightEdge() returns true and returns null, indicating the
|
||||||
|
* the machine cannot advance further. The machine may explode, though this is not contracted,
|
||||||
|
* if stepForwardOnGenome() is called after a previous call returned null.
|
||||||
|
*
|
||||||
|
* @return the operator of the cigar element that machine stopped at, null if we advanced off the end of the read
|
||||||
|
*/
|
||||||
|
@Ensures("result != null || isRightEdge()")
|
||||||
public CigarOperator stepForwardOnGenome() {
|
public CigarOperator stepForwardOnGenome() {
|
||||||
// loop until we either find a cigar element step that moves us one base on the genome, or we run
|
// loop until we either find a cigar element step that moves us one base on the genome, or we run
|
||||||
// out of cigar elements
|
// out of cigar elements
|
||||||
|
|
@ -177,11 +283,17 @@ class AlignmentStateMachine {
|
||||||
if (currentElement != null && currentElement.getOperator() == CigarOperator.D)
|
if (currentElement != null && currentElement.getOperator() == CigarOperator.D)
|
||||||
throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
|
throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
|
||||||
|
|
||||||
|
// we're done, so set the offset of the cigar to 0 for cleanliness, as well as the current element
|
||||||
|
offsetIntoCurrentCigarElement = 0;
|
||||||
|
readOffset = read.getReadLength();
|
||||||
|
currentElement = null;
|
||||||
|
|
||||||
// Reads that contain indels model the genomeOffset as the following base in the reference. Because
|
// Reads that contain indels model the genomeOffset as the following base in the reference. Because
|
||||||
// we fall into this else block only when indels end the read, increment genomeOffset such that the
|
// we fall into this else block only when indels end the read, increment genomeOffset such that the
|
||||||
// current offset of this read is the next ref base after the end of the indel. This position will
|
// current offset of this read is the next ref base after the end of the indel. This position will
|
||||||
// model a point on the reference somewhere after the end of the read.
|
// model a point on the reference somewhere after the end of the read.
|
||||||
genomeOffset++; // extended events need that. Logically, it's legal to advance the genomic offset here:
|
genomeOffset++; // extended events need that. Logically, it's legal to advance the genomic offset here:
|
||||||
|
|
||||||
// we do step forward on the ref, and by returning null we also indicate that we are past the read end.
|
// we do step forward on the ref, and by returning null we also indicate that we are past the read end.
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,12 +26,12 @@
|
||||||
package org.broadinstitute.sting.utils.locusiterator;
|
package org.broadinstitute.sting.utils.locusiterator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created with IntelliJ IDEA.
|
* Simple wrapper about the information LIBS needs about downsampling
|
||||||
* User: depristo
|
*
|
||||||
* Date: 1/5/13
|
* User: depristo
|
||||||
* Time: 1:26 PM
|
* Date: 1/5/13
|
||||||
* To change this template use File | Settings | File Templates.
|
* Time: 1:26 PM
|
||||||
*/
|
*/
|
||||||
public class LIBSDownsamplingInfo {
|
public class LIBSDownsamplingInfo {
|
||||||
public final static LIBSDownsamplingInfo NO_DOWNSAMPLING = new LIBSDownsamplingInfo(false, -1);
|
public final static LIBSDownsamplingInfo NO_DOWNSAMPLING = new LIBSDownsamplingInfo(false, -1);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,7 @@
|
||||||
package org.broadinstitute.sting.utils.locusiterator;
|
package org.broadinstitute.sting.utils.locusiterator;
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
@ -69,12 +70,16 @@ import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Iterator that traverses a SAM File, accumulating information on a per-locus basis
|
* Iterator that traverses a SAM File, accumulating information on a per-locus basis
|
||||||
|
*
|
||||||
|
* Produces AlignmentContext objects, that contain ReadBackedPileups of PileupElements. This
|
||||||
|
* class has its core job of converting an iterator of ordered SAMRecords into those
|
||||||
|
* RBPs.
|
||||||
*/
|
*/
|
||||||
public class LocusIteratorByState extends LocusIterator {
|
public class LocusIteratorByState extends LocusIterator {
|
||||||
/**
|
/**
|
||||||
* our log, which we want to capture anything from this class
|
* our log, which we want to capture anything from this class
|
||||||
*/
|
*/
|
||||||
private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
|
private final static Logger logger = Logger.getLogger(LocusIteratorByState.class);
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
@ -83,13 +88,32 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Used to create new GenomeLocs.
|
* Used to create new GenomeLocs as needed
|
||||||
*/
|
*/
|
||||||
private final GenomeLocParser genomeLocParser;
|
private final GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A complete list of all samples that may come out of the reads. Must be
|
||||||
|
* comprehensive.
|
||||||
|
*/
|
||||||
private final ArrayList<String> samples;
|
private final ArrayList<String> samples;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The system that maps incoming reads from the iterator to their pileup states
|
||||||
|
*/
|
||||||
private final ReadStateManager readStates;
|
private final ReadStateManager readStates;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Should we include reads in the pileup which are aligned with a deletion operator to the reference?
|
||||||
|
*/
|
||||||
private final boolean includeReadsWithDeletionAtLoci;
|
private final boolean includeReadsWithDeletionAtLoci;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The next alignment context. A non-null value means that a
|
||||||
|
* context is waiting from hasNext() for sending off to the next next() call. A null
|
||||||
|
* value means that either hasNext() has not been called at all or that
|
||||||
|
* the underlying iterator is exhausted
|
||||||
|
*/
|
||||||
private AlignmentContext nextAlignmentContext;
|
private AlignmentContext nextAlignmentContext;
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
|
@ -98,6 +122,18 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
//
|
//
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new LocusIteratorByState
|
||||||
|
*
|
||||||
|
* @param samIterator the iterator of reads to process into pileups. Reads must be ordered
|
||||||
|
* according to standard coordinate-sorted BAM conventions
|
||||||
|
* @param readInformation meta-information about how to process the reads (i.e., should we do downsampling?)
|
||||||
|
* @param genomeLocParser used to create genome locs
|
||||||
|
* @param samples a complete list of samples present in the read groups for the reads coming from samIterator.
|
||||||
|
* This is generally just the set of read group sample fields in the SAMFileHeader. This
|
||||||
|
* list of samples may contain a null element, and all reads without read groups will
|
||||||
|
* be mapped to this null sample
|
||||||
|
*/
|
||||||
public LocusIteratorByState(final Iterator<SAMRecord> samIterator,
|
public LocusIteratorByState(final Iterator<SAMRecord> samIterator,
|
||||||
final ReadProperties readInformation,
|
final ReadProperties readInformation,
|
||||||
final GenomeLocParser genomeLocParser,
|
final GenomeLocParser genomeLocParser,
|
||||||
|
|
@ -116,16 +152,21 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
final GenomeLocParser genomeLocParser,
|
final GenomeLocParser genomeLocParser,
|
||||||
final Collection<String> samples,
|
final Collection<String> samples,
|
||||||
final boolean maintainUniqueReadsList) {
|
final boolean maintainUniqueReadsList) {
|
||||||
|
if ( samIterator == null ) throw new IllegalArgumentException("samIterator cannot be null");
|
||||||
|
if ( downsamplingInfo == null ) throw new IllegalArgumentException("downsamplingInfo cannot be null");
|
||||||
|
if ( genomeLocParser == null ) throw new IllegalArgumentException("genomeLocParser cannot be null");
|
||||||
|
if ( samples == null ) throw new IllegalArgumentException("Samples cannot be null");
|
||||||
|
|
||||||
|
// currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
|
||||||
|
// there's no read data. So we need to throw this error only when samIterator.hasNext() is true
|
||||||
|
if (samples.isEmpty() && samIterator.hasNext()) {
|
||||||
|
throw new IllegalArgumentException("samples list must not be empty");
|
||||||
|
}
|
||||||
|
|
||||||
this.genomeLocParser = genomeLocParser;
|
this.genomeLocParser = genomeLocParser;
|
||||||
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
|
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
|
||||||
this.samples = new ArrayList<String>(samples);
|
this.samples = new ArrayList<String>(samples);
|
||||||
this.readStates = new ReadStateManager(samIterator, this.samples, downsamplingInfo, maintainUniqueReadsList);
|
this.readStates = new ReadStateManager(samIterator, this.samples, downsamplingInfo, maintainUniqueReadsList);
|
||||||
|
|
||||||
// currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
|
|
||||||
// there's no read data. So we need to throw this error only when samIterator.hasNext() is true
|
|
||||||
if (this.samples.isEmpty() && samIterator.hasNext()) {
|
|
||||||
throw new IllegalArgumentException("samples list must not be empty");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -133,16 +174,14 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
/**
|
||||||
public void close() {
|
* Get the current location (i.e., the bp of the center of the pileup) of the pileup, or null if not anywhere yet
|
||||||
}
|
*
|
||||||
|
* Assumes that read states is updated to reflect the current pileup position, but not advanced to the
|
||||||
@Override
|
* next location.
|
||||||
public boolean hasNext() {
|
*
|
||||||
lazyLoadNextAlignmentContext();
|
* @return the location of the current pileup, or null if we're after all reads
|
||||||
return nextAlignmentContext != null;
|
*/
|
||||||
}
|
|
||||||
|
|
||||||
private GenomeLoc getLocation() {
|
private GenomeLoc getLocation() {
|
||||||
return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser);
|
return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser);
|
||||||
}
|
}
|
||||||
|
|
@ -153,6 +192,22 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
//
|
//
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is there another pileup available?
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() {
|
||||||
|
lazyLoadNextAlignmentContext();
|
||||||
|
return nextAlignmentContext != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the next AlignmentContext available from the reads.
|
||||||
|
*
|
||||||
|
* @return a non-null AlignmentContext of the pileup after to the next genomic position covered by
|
||||||
|
* at least one read.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public AlignmentContext next() {
|
public AlignmentContext next() {
|
||||||
lazyLoadNextAlignmentContext();
|
lazyLoadNextAlignmentContext();
|
||||||
|
|
@ -164,8 +219,9 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates the next alignment context from the given state. Note that this is implemented as a lazy load method.
|
* Creates the next alignment context from the given state. Note that this is implemented as a
|
||||||
* nextAlignmentContext MUST BE null in order for this method to advance to the next entry.
|
* lazy load method. nextAlignmentContext MUST BE null in order for this method to advance to the
|
||||||
|
* next entry.
|
||||||
*/
|
*/
|
||||||
private void lazyLoadNextAlignmentContext() {
|
private void lazyLoadNextAlignmentContext() {
|
||||||
while (nextAlignmentContext == null && readStates.hasNext()) {
|
while (nextAlignmentContext == null && readStates.hasNext()) {
|
||||||
|
|
@ -193,7 +249,7 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
if (op == CigarOperator.N) // N's are never added to any pileup
|
if (op == CigarOperator.N) // N's are never added to any pileup
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!filterBaseInRead(read, location.getStart())) {
|
if (!dontIncludeReadInPileup(read, location.getStart())) {
|
||||||
if ( op == CigarOperator.D ) {
|
if ( op == CigarOperator.D ) {
|
||||||
if ( ! includeReadsWithDeletionAtLoci )
|
if ( ! includeReadsWithDeletionAtLoci )
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -220,6 +276,10 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Advances all fo the read states by one bp. After this call the read states are reflective
|
||||||
|
* of the next pileup.
|
||||||
|
*/
|
||||||
private void updateReadStates() {
|
private void updateReadStates() {
|
||||||
for (final String sample : samples) {
|
for (final String sample : samples) {
|
||||||
Iterator<AlignmentStateMachine> it = readStates.iterator(sample);
|
Iterator<AlignmentStateMachine> it = readStates.iterator(sample);
|
||||||
|
|
@ -288,13 +348,16 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* Should this read be excluded from the pileup?
|
||||||
|
*
|
||||||
* Generic place to put per-base filters appropriate to LocusIteratorByState
|
* Generic place to put per-base filters appropriate to LocusIteratorByState
|
||||||
*
|
*
|
||||||
* @param rec
|
* @param rec the read to potentially exclude
|
||||||
* @param pos
|
* @param pos the genomic position of the current alignment
|
||||||
* @return
|
* @return true if the read should be excluded from the pileup, false otherwise
|
||||||
*/
|
*/
|
||||||
private boolean filterBaseInRead(GATKSAMRecord rec, long pos) {
|
@Requires({"rec != null", "pos > 0"})
|
||||||
|
private boolean dontIncludeReadInPileup(GATKSAMRecord rec, long pos) {
|
||||||
return ReadUtils.isBaseInsideAdaptor(rec, pos);
|
return ReadUtils.isBaseInsideAdaptor(rec, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -311,6 +374,8 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
* @param readInfo GATK engine information about what should be done to the reads
|
* @param readInfo GATK engine information about what should be done to the reads
|
||||||
* @return a LIBS specific info holder about downsampling only
|
* @return a LIBS specific info holder about downsampling only
|
||||||
*/
|
*/
|
||||||
|
@Requires("readInfo != null")
|
||||||
|
@Ensures("result != null")
|
||||||
private static LIBSDownsamplingInfo toDownsamplingInfo(final ReadProperties readInfo) {
|
private static LIBSDownsamplingInfo toDownsamplingInfo(final ReadProperties readInfo) {
|
||||||
final boolean performDownsampling = readInfo.getDownsamplingMethod() != null &&
|
final boolean performDownsampling = readInfo.getDownsamplingMethod() != null &&
|
||||||
readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
|
readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@ public class AlignmentStateMachineUnitTest extends LocusIteratorByStateBaseTest
|
||||||
// return new Object[][]{{new LIBSTest("2M2D2X", 2)}};
|
// return new Object[][]{{new LIBSTest("2M2D2X", 2)}};
|
||||||
// return createLIBSTests(
|
// return createLIBSTests(
|
||||||
// Arrays.asList(2),
|
// Arrays.asList(2),
|
||||||
// Arrays.asList(5));
|
// Arrays.asList(2));
|
||||||
return createLIBSTests(
|
return createLIBSTests(
|
||||||
Arrays.asList(1, 2),
|
Arrays.asList(1, 2),
|
||||||
Arrays.asList(1, 2, 3, 4));
|
Arrays.asList(1, 2, 3, 4));
|
||||||
|
|
@ -63,15 +63,23 @@ public class AlignmentStateMachineUnitTest extends LocusIteratorByStateBaseTest
|
||||||
int lastOffset = -1;
|
int lastOffset = -1;
|
||||||
|
|
||||||
// TODO -- more tests about test state machine state before first step?
|
// TODO -- more tests about test state machine state before first step?
|
||||||
Assert.assertTrue(state.isEdge());
|
Assert.assertTrue(state.isLeftEdge());
|
||||||
|
Assert.assertNull(state.getCigarOperator());
|
||||||
|
Assert.assertNotNull(state.toString());
|
||||||
|
Assert.assertEquals(state.getReadOffset(), -1);
|
||||||
|
Assert.assertEquals(state.getGenomeOffset(), -1);
|
||||||
|
Assert.assertEquals(state.getCurrentCigarElementOffset(), -1);
|
||||||
|
Assert.assertEquals(state.getCurrentCigarElement(), null);
|
||||||
|
|
||||||
while ( state.stepForwardOnGenome() != null ) {
|
while ( state.stepForwardOnGenome() != null ) {
|
||||||
|
Assert.assertNotNull(state.toString());
|
||||||
|
|
||||||
tester.stepForwardOnGenome();
|
tester.stepForwardOnGenome();
|
||||||
|
|
||||||
Assert.assertTrue(state.getReadOffset() >= lastOffset, "Somehow read offsets are decreasing: lastOffset " + lastOffset + " current " + state.getReadOffset());
|
Assert.assertTrue(state.getReadOffset() >= lastOffset, "Somehow read offsets are decreasing: lastOffset " + lastOffset + " current " + state.getReadOffset());
|
||||||
Assert.assertEquals(state.getReadOffset(), tester.getCurrentReadOffset(), "Read offsets are wrong at " + bpVisited);
|
Assert.assertEquals(state.getReadOffset(), tester.getCurrentReadOffset(), "Read offsets are wrong at " + bpVisited);
|
||||||
|
|
||||||
Assert.assertFalse(state.isEdge());
|
Assert.assertFalse(state.isLeftEdge());
|
||||||
|
|
||||||
Assert.assertEquals(state.getCurrentCigarElement(), read.getCigar().getCigarElement(tester.currentOperatorIndex), "CigarElement index failure");
|
Assert.assertEquals(state.getCurrentCigarElement(), read.getCigar().getCigarElement(tester.currentOperatorIndex), "CigarElement index failure");
|
||||||
Assert.assertEquals(state.getOffsetIntoCurrentCigarElement(), tester.getCurrentPositionOnOperatorBase0(), "CigarElement index failure");
|
Assert.assertEquals(state.getOffsetIntoCurrentCigarElement(), tester.getCurrentPositionOnOperatorBase0(), "CigarElement index failure");
|
||||||
|
|
@ -91,5 +99,9 @@ public class AlignmentStateMachineUnitTest extends LocusIteratorByStateBaseTest
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp");
|
Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp");
|
||||||
|
Assert.assertEquals(state.getReadOffset(), read.getReadLength());
|
||||||
|
Assert.assertEquals(state.getCurrentCigarElementOffset(), read.getCigarLength());
|
||||||
|
Assert.assertEquals(state.getCurrentCigarElement(), null);
|
||||||
|
Assert.assertNotNull(state.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -90,7 +90,7 @@ public class LocusIteratorByStateBaseTest extends BaseTest {
|
||||||
new ValidationExclusion(),
|
new ValidationExclusion(),
|
||||||
Collections.<ReadFilter>emptyList(),
|
Collections.<ReadFilter>emptyList(),
|
||||||
Collections.<ReadTransformer>emptyList(),
|
Collections.<ReadTransformer>emptyList(),
|
||||||
false,
|
true,
|
||||||
(byte) -1,
|
(byte) -1,
|
||||||
keepReads);
|
keepReads);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -50,9 +50,10 @@ import java.util.*;
|
||||||
* testing of the new (non-legacy) version of LocusIteratorByState
|
* testing of the new (non-legacy) version of LocusIteratorByState
|
||||||
*/
|
*/
|
||||||
public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
|
private static final boolean DEBUG = false;
|
||||||
protected LocusIteratorByState li;
|
protected LocusIteratorByState li;
|
||||||
|
|
||||||
@Test
|
@Test(enabled = true && ! DEBUG)
|
||||||
public void testXandEQOperators() {
|
public void testXandEQOperators() {
|
||||||
final byte[] bases1 = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
final byte[] bases1 = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
||||||
final byte[] bases2 = new byte[] {'A','A','A','C','A','A','A','A','A','C'};
|
final byte[] bases2 = new byte[] {'A','A','A','C','A','A','A','A','A','C'};
|
||||||
|
|
@ -92,7 +93,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true && ! DEBUG)
|
||||||
public void testIndelsInRegularPileup() {
|
public void testIndelsInRegularPileup() {
|
||||||
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
||||||
final byte[] indelBases = new byte[] {'A','A','A','A','C','T','A','A','A','A','A','A'};
|
final byte[] indelBases = new byte[] {'A','A','A','A','C','T','A','A','A','A','A','A'};
|
||||||
|
|
@ -138,7 +139,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
Assert.assertTrue(foundIndel,"Indel in pileup not found");
|
Assert.assertTrue(foundIndel,"Indel in pileup not found");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test(enabled = false && ! DEBUG)
|
||||||
public void testWholeIndelReadInIsolation() {
|
public void testWholeIndelReadInIsolation() {
|
||||||
final int firstLocus = 44367789;
|
final int firstLocus = 44367789;
|
||||||
|
|
||||||
|
|
@ -169,7 +170,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
* Test to make sure that reads supporting only an indel (example cigar string: 76I) do
|
* Test to make sure that reads supporting only an indel (example cigar string: 76I) do
|
||||||
* not negatively influence the ordering of the pileup.
|
* not negatively influence the ordering of the pileup.
|
||||||
*/
|
*/
|
||||||
@Test(enabled = true)
|
@Test(enabled = true && ! DEBUG)
|
||||||
public void testWholeIndelRead() {
|
public void testWholeIndelRead() {
|
||||||
final int firstLocus = 44367788, secondLocus = firstLocus + 1;
|
final int firstLocus = 44367788, secondLocus = firstLocus + 1;
|
||||||
|
|
||||||
|
|
@ -220,7 +221,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
/**
|
/**
|
||||||
* Test to make sure that reads supporting only an indel (example cigar string: 76I) are represented properly
|
* Test to make sure that reads supporting only an indel (example cigar string: 76I) are represented properly
|
||||||
*/
|
*/
|
||||||
@Test(enabled = false)
|
@Test(enabled = false && ! DEBUG)
|
||||||
public void testWholeIndelReadRepresentedTest() {
|
public void testWholeIndelReadRepresentedTest() {
|
||||||
final int firstLocus = 44367788, secondLocus = firstLocus + 1;
|
final int firstLocus = 44367788, secondLocus = firstLocus + 1;
|
||||||
|
|
||||||
|
|
@ -298,7 +299,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
return tests.toArray(new Object[][]{});
|
return tests.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(dataProvider = "IndelLengthAndBasesTest")
|
@Test(enabled = true && ! DEBUG, dataProvider = "IndelLengthAndBasesTest")
|
||||||
public void testIndelLengthAndBasesTest(GATKSAMRecord read, final CigarOperator op, final int eventSize, final String eventBases) {
|
public void testIndelLengthAndBasesTest(GATKSAMRecord read, final CigarOperator op, final int eventSize, final String eventBases) {
|
||||||
// create the iterator by state with the fake reads and fake records
|
// create the iterator by state with the fake reads and fake records
|
||||||
li = makeLTBS(Arrays.asList((SAMRecord)read), createTestReadProperties());
|
li = makeLTBS(Arrays.asList((SAMRecord)read), createTestReadProperties());
|
||||||
|
|
@ -337,7 +338,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
public Object[][] makeLIBSTest() {
|
public Object[][] makeLIBSTest() {
|
||||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||||
|
|
||||||
// tests.add(new Object[]{new LIBSTest("1X2D2P2X", 1)});
|
// tests.add(new Object[]{new LIBSTest("2=2D2=2X", 1)});
|
||||||
// return tests.toArray(new Object[][]{});
|
// return tests.toArray(new Object[][]{});
|
||||||
|
|
||||||
return createLIBSTests(
|
return createLIBSTests(
|
||||||
|
|
@ -349,7 +350,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
// Arrays.asList(3));
|
// Arrays.asList(3));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false, dataProvider = "LIBSTest")
|
@Test(enabled = true, dataProvider = "LIBSTest")
|
||||||
public void testLIBS(LIBSTest params) {
|
public void testLIBS(LIBSTest params) {
|
||||||
// create the iterator by state with the fake reads and fake records
|
// create the iterator by state with the fake reads and fake records
|
||||||
final GATKSAMRecord read = params.makeRead();
|
final GATKSAMRecord read = params.makeRead();
|
||||||
|
|
@ -366,19 +367,19 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
Assert.assertEquals(p.getNumberOfElements(), 1);
|
Assert.assertEquals(p.getNumberOfElements(), 1);
|
||||||
PileupElement pe = p.iterator().next();
|
PileupElement pe = p.iterator().next();
|
||||||
|
|
||||||
Assert.assertEquals(p.getNumberOfDeletions(), pe.isDeletion() ? 1 : 0);
|
Assert.assertEquals(p.getNumberOfDeletions(), pe.isDeletion() ? 1 : 0, "wrong number of deletions in the pileup");
|
||||||
Assert.assertEquals(p.getNumberOfMappingQualityZeroReads(), pe.getRead().getMappingQuality() == 0 ? 1 : 0);
|
Assert.assertEquals(p.getNumberOfMappingQualityZeroReads(), pe.getRead().getMappingQuality() == 0 ? 1 : 0, "wront number of mapq reads in the pileup");
|
||||||
|
|
||||||
tester.stepForwardOnGenome();
|
tester.stepForwardOnGenome();
|
||||||
|
|
||||||
if ( ! hasNeighboringPaddedOps(params.getElements(), pe.getCurrentCigarOffset()) ) {
|
if ( ! hasNeighboringPaddedOps(params.getElements(), pe.getCurrentCigarOffset()) ) {
|
||||||
Assert.assertEquals(pe.isBeforeDeletionStart(), tester.isBeforeDeletionStart);
|
Assert.assertEquals(pe.isBeforeDeletionStart(), tester.isBeforeDeletionStart, "before deletion start failure");
|
||||||
Assert.assertEquals(pe.isAfterDeletionEnd(), tester.isAfterDeletionEnd);
|
Assert.assertEquals(pe.isAfterDeletionEnd(), tester.isAfterDeletionEnd, "after deletion end failure");
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertEquals(pe.isBeforeInsertion(), tester.isBeforeInsertion);
|
Assert.assertEquals(pe.isBeforeInsertion(), tester.isBeforeInsertion, "before insertion failure");
|
||||||
Assert.assertEquals(pe.isAfterInsertion(), tester.isAfterInsertion);
|
Assert.assertEquals(pe.isAfterInsertion(), tester.isAfterInsertion, "after insertion failure");
|
||||||
Assert.assertEquals(pe.isNextToSoftClip(), tester.isNextToSoftClip);
|
Assert.assertEquals(pe.isNextToSoftClip(), tester.isNextToSoftClip, "next to soft clip failure");
|
||||||
|
|
||||||
Assert.assertTrue(pe.getOffset() >= lastOffset, "Somehow read offsets are decreasing: lastOffset " + lastOffset + " current " + pe.getOffset());
|
Assert.assertTrue(pe.getOffset() >= lastOffset, "Somehow read offsets are decreasing: lastOffset " + lastOffset + " current " + pe.getOffset());
|
||||||
Assert.assertEquals(pe.getOffset(), tester.getCurrentReadOffset(), "Read offsets are wrong at " + bpVisited);
|
Assert.assertEquals(pe.getOffset(), tester.getCurrentReadOffset(), "Read offsets are wrong at " + bpVisited);
|
||||||
|
|
@ -391,7 +392,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
Assert.assertTrue(pe.getOffsetInCurrentCigar() >= 0, "Offset into current cigar too small");
|
Assert.assertTrue(pe.getOffsetInCurrentCigar() >= 0, "Offset into current cigar too small");
|
||||||
Assert.assertTrue(pe.getOffsetInCurrentCigar() < pe.getCurrentCigarElement().getLength(), "Offset into current cigar too big");
|
Assert.assertTrue(pe.getOffsetInCurrentCigar() < pe.getCurrentCigarElement().getLength(), "Offset into current cigar too big");
|
||||||
|
|
||||||
Assert.assertEquals(pe.getOffset(), tester.getCurrentReadOffset());
|
Assert.assertEquals(pe.getOffset(), tester.getCurrentReadOffset(), "Read offset failure");
|
||||||
lastOffset = pe.getOffset();
|
lastOffset = pe.getOffset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -431,7 +432,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
||||||
return tests.toArray(new Object[][]{});
|
return tests.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true, dataProvider = "LIBSKeepSubmittedReads")
|
@Test(enabled = true && ! DEBUG, dataProvider = "LIBSKeepSubmittedReads")
|
||||||
public void testLIBSKeepSubmittedReads(final int nReadsPerLocus,
|
public void testLIBSKeepSubmittedReads(final int nReadsPerLocus,
|
||||||
final int nLoci,
|
final int nLoci,
|
||||||
final int nSamples,
|
final int nSamples,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue