Contracts and documentation for AlignmentStateMachine and LocusIteratorByState

-- Add more unit tests for both as well
This commit is contained in:
Mark DePristo 2013-01-09 15:14:42 -05:00
parent cc1d259cac
commit 2f2a592c8e
6 changed files with 251 additions and 61 deletions

View File

@ -25,6 +25,9 @@
package org.broadinstitute.sting.utils.locusiterator; package org.broadinstitute.sting.utils.locusiterator;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import net.sf.samtools.Cigar; import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement; import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator; import net.sf.samtools.CigarOperator;
@ -40,16 +43,18 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
* implements the traversal along the reference; thus stepForwardOnGenome() returns * implements the traversal along the reference; thus stepForwardOnGenome() returns
* on every and only on actual reference bases. This can be a (mis)match or a deletion * on every and only on actual reference bases. This can be a (mis)match or a deletion
* (in the latter case, we still return on every individual reference base the deletion spans). * (in the latter case, we still return on every individual reference base the deletion spans).
* In the extended events mode, the record state also remembers if there was an insertion, or
* if the deletion just started *right before* the current reference base the record state is
* pointing to upon the return from stepForwardOnGenome(). The next call to stepForwardOnGenome()
* will clear that memory (as we remember only extended events immediately preceding
* the current reference base).
* *
* User: depristo * User: depristo
* Date: 1/5/13 * Date: 1/5/13
* Time: 1:08 PM * Time: 1:08 PM
*/ */
@Invariant({
"nCigarElements >= 0",
"cigar != null",
"read != null",
"currentCigarElementOffset >= -1",
"currentCigarElementOffset <= nCigarElements"
})
class AlignmentStateMachine { class AlignmentStateMachine {
/** /**
* Our read * Our read
@ -79,6 +84,7 @@ class AlignmentStateMachine {
*/ */
private int offsetIntoCurrentCigarElement; private int offsetIntoCurrentCigarElement;
@Requires({"read != null", "read.getAlignmentStart() != -1", "read.getCigar() != null"})
public AlignmentStateMachine(final SAMRecord read) { public AlignmentStateMachine(final SAMRecord read) {
this.read = read; this.read = read;
this.cigar = read.getCigar(); this.cigar = read.getCigar();
@ -86,28 +92,48 @@ class AlignmentStateMachine {
initializeAsLeftEdge(); initializeAsLeftEdge();
} }
/**
* Initialize the state variables to put this machine one bp before the
* start of the alignment, so that a call to stepForwardOnGenome() will advance
* us to the first proper location
*/
@Ensures("isLeftEdge()")
private void initializeAsLeftEdge() { private void initializeAsLeftEdge() {
readOffset = offsetIntoCurrentCigarElement = genomeOffset = -1; readOffset = offsetIntoCurrentCigarElement = genomeOffset = -1;
currentElement = null; currentElement = null;
} }
/**
* Get the read we are aligning to the genome
* @return a non-null GATKSAMRecord
*/
@Ensures("result != null")
public SAMRecord getRead() { public SAMRecord getRead() {
return read; return read;
} }
/** /**
* Is this an edge state? I.e., one that is before or after the current read? * Is this the left edge state? I.e., one that is before or after the current read?
* @return true if this state is an edge state, false otherwise * @return true if this state is an edge state, false otherwise
*/ */
public boolean isEdge() { public boolean isLeftEdge() {
return readOffset == -1; return readOffset == -1;
} }
/**
* Are we on the right edge? I.e., is the current state off the right of the alignment?
* @return true if off the right edge, false if otherwise
*/
public boolean isRightEdge() {
return readOffset == read.getReadLength();
}
/** /**
* What is our current offset in the read's bases that aligns us with the reference genome? * What is our current offset in the read's bases that aligns us with the reference genome?
* *
* @return the current read offset position * @return the current read offset position. If an edge will be == -1
*/ */
@Ensures("result >= -1")
public int getReadOffset() { public int getReadOffset() {
return readOffset; return readOffset;
} }
@ -115,39 +141,96 @@ class AlignmentStateMachine {
/** /**
* What is the current offset w.r.t. the alignment state that aligns us to the readOffset? * What is the current offset w.r.t. the alignment state that aligns us to the readOffset?
* *
* @return the current offset * @return the current offset from the alignment start on the genome. If this state is
* at the left edge the result will be -1;
*/ */
@Ensures("result >= -1")
public int getGenomeOffset() { public int getGenomeOffset() {
return genomeOffset; return genomeOffset;
} }
/**
* Get the position (1-based as standard) of the current alignment on the genome w.r.t. the read's alignment start
* @return the position on the genome of the current state in absolute coordinates
*/
@Ensures("result > 0")
public int getGenomePosition() { public int getGenomePosition() {
return read.getAlignmentStart() + getGenomeOffset(); return read.getAlignmentStart() + getGenomeOffset();
} }
/**
* Gets #getGenomePosition but as a 1 bp GenomeLoc
* @param genomeLocParser the parser to use to create the genome loc
* @return a non-null genome location with start position of getGenomePosition
*/
@Requires("genomeLocParser != null")
@Ensures("result != null")
public GenomeLoc getLocation(final GenomeLocParser genomeLocParser) { public GenomeLoc getLocation(final GenomeLocParser genomeLocParser) {
// TODO -- may return wonky results if on an edge (could be 0 or could be beyond genome location)
return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition()); return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
} }
/**
* Get the cigar element we're currently aligning with.
*
* For example, if the cigar string is 2M2D2M and we're in the second step of the
* first 2M, then this function returns the element 2M. After calling stepForwardOnGenome
* this function would return 2D.
*
* @return the cigar element, or null if we're the left edge
*/
@Ensures("result != null || isLeftEdge() || isRightEdge()")
public CigarElement getCurrentCigarElement() { public CigarElement getCurrentCigarElement() {
return currentElement; return currentElement;
} }
/**
* Get the offset of the current cigar element among all cigar elements in the read
*
* Suppose our read's cigar is 1M2D3M, and we're at the first 1M. This would
* return 0. Stepping forward puts us in the 2D, so our offset is 1. Another
* step forward would result in a 1 again (we're in the second position of the 2D).
* Finally, one more step forward brings us to 2 (for the 3M element)
*
* @return the offset of the current cigar element in the reads's cigar. Will return -1 for
* when the state is on the left edge, and be == the number of cigar elements in the
* read when we're past the last position on the genome
*/
@Ensures({"result >= -1", "result <= nCigarElements"})
public int getCurrentCigarElementOffset() { public int getCurrentCigarElementOffset() {
return currentCigarElementOffset; return currentCigarElementOffset;
} }
/**
* Get the offset of the current state into the current cigar element
*
* That is, suppose we have a read with cigar 2M3D4M, and we're right at
* the second M position. offsetIntoCurrentCigarElement would be 1, as
* it's two elements into the 2M cigar. Now stepping forward we'd be
* in cigar element 3D, and our offsetIntoCurrentCigarElement would be 0.
*
* @return the offset (from 0) of the current state in the current cigar element.
* Will be 0 on the right edge, and -1 on the left.
*/
@Ensures({"result >= 0 || (result == -1 && isLeftEdge())", "!isRightEdge() || result == 0"})
public int getOffsetIntoCurrentCigarElement() { public int getOffsetIntoCurrentCigarElement() {
return offsetIntoCurrentCigarElement; return offsetIntoCurrentCigarElement;
} }
/** /**
* Convenience accessor of the CigarOperator of the current cigar element
*
* Robust to the case where we're on the edge, and currentElement is null, in which
* case this function returns null as well
*
* @return null if this is an edge state * @return null if this is an edge state
*/ */
@Ensures("result != null || isLeftEdge() || isRightEdge()")
public CigarOperator getCigarOperator() { public CigarOperator getCigarOperator() {
return currentElement == null ? null : currentElement.getOperator(); return currentElement == null ? null : currentElement.getOperator();
} }
@Override
public String toString() { public String toString() {
return String.format("%s ro=%d go=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, offsetIntoCurrentCigarElement, currentElement); return String.format("%s ro=%d go=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, offsetIntoCurrentCigarElement, currentElement);
} }
@ -158,6 +241,29 @@ class AlignmentStateMachine {
// //
// ----------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------
/**
* Step the state machine forward one unit
*
* Takes the current state of this machine, and advances the state until the next on-genome
* cigar element (M, X, =, D) is encountered, at which point this function returns with the
* cigar operator of the current element.
*
* Assumes that the AlignmentStateMachine is in the left edge state at the start, so that
* stepForwardOnGenome() can be called to move the machine to the first alignment position. That
* is, the normal use of this code is:
*
* AlignmentStateMachine machine = new AlignmentStateMachine(read)
* machine.stepForwardOnGenome()
* // now the machine is at the first position on the genome
*
* When stepForwardOnGenome() advances off the right edge of the read, the state machine is
* left in a state such that isRightEdge() returns true and returns null, indicating the
* the machine cannot advance further. The machine may explode, though this is not contracted,
* if stepForwardOnGenome() is called after a previous call returned null.
*
* @return the operator of the cigar element that machine stopped at, null if we advanced off the end of the read
*/
@Ensures("result != null || isRightEdge()")
public CigarOperator stepForwardOnGenome() { public CigarOperator stepForwardOnGenome() {
// loop until we either find a cigar element step that moves us one base on the genome, or we run // loop until we either find a cigar element step that moves us one base on the genome, or we run
// out of cigar elements // out of cigar elements
@ -177,11 +283,17 @@ class AlignmentStateMachine {
if (currentElement != null && currentElement.getOperator() == CigarOperator.D) if (currentElement != null && currentElement.getOperator() == CigarOperator.D)
throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar"); throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
// we're done, so set the offset of the cigar to 0 for cleanliness, as well as the current element
offsetIntoCurrentCigarElement = 0;
readOffset = read.getReadLength();
currentElement = null;
// Reads that contain indels model the genomeOffset as the following base in the reference. Because // Reads that contain indels model the genomeOffset as the following base in the reference. Because
// we fall into this else block only when indels end the read, increment genomeOffset such that the // we fall into this else block only when indels end the read, increment genomeOffset such that the
// current offset of this read is the next ref base after the end of the indel. This position will // current offset of this read is the next ref base after the end of the indel. This position will
// model a point on the reference somewhere after the end of the read. // model a point on the reference somewhere after the end of the read.
genomeOffset++; // extended events need that. Logically, it's legal to advance the genomic offset here: genomeOffset++; // extended events need that. Logically, it's legal to advance the genomic offset here:
// we do step forward on the ref, and by returning null we also indicate that we are past the read end. // we do step forward on the ref, and by returning null we also indicate that we are past the read end.
return null; return null;
} }

View File

@ -26,12 +26,12 @@
package org.broadinstitute.sting.utils.locusiterator; package org.broadinstitute.sting.utils.locusiterator;
/** /**
* Created with IntelliJ IDEA. * Simple wrapper about the information LIBS needs about downsampling
* User: depristo *
* Date: 1/5/13 * User: depristo
* Time: 1:26 PM * Date: 1/5/13
* To change this template use File | Settings | File Templates. * Time: 1:26 PM
*/ */
public class LIBSDownsamplingInfo { public class LIBSDownsamplingInfo {
public final static LIBSDownsamplingInfo NO_DOWNSAMPLING = new LIBSDownsamplingInfo(false, -1); public final static LIBSDownsamplingInfo NO_DOWNSAMPLING = new LIBSDownsamplingInfo(false, -1);

View File

@ -52,6 +52,7 @@
package org.broadinstitute.sting.utils.locusiterator; package org.broadinstitute.sting.utils.locusiterator;
import com.google.java.contract.Ensures; import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.CigarOperator; import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
@ -69,12 +70,16 @@ import java.util.*;
/** /**
* Iterator that traverses a SAM File, accumulating information on a per-locus basis * Iterator that traverses a SAM File, accumulating information on a per-locus basis
*
* Produces AlignmentContext objects, that contain ReadBackedPileups of PileupElements. This
* class has its core job of converting an iterator of ordered SAMRecords into those
* RBPs.
*/ */
public class LocusIteratorByState extends LocusIterator { public class LocusIteratorByState extends LocusIterator {
/** /**
* our log, which we want to capture anything from this class * our log, which we want to capture anything from this class
*/ */
private static Logger logger = Logger.getLogger(LocusIteratorByState.class); private final static Logger logger = Logger.getLogger(LocusIteratorByState.class);
// ----------------------------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------------------------
// //
@ -83,13 +88,32 @@ public class LocusIteratorByState extends LocusIterator {
// ----------------------------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------------------------
/** /**
* Used to create new GenomeLocs. * Used to create new GenomeLocs as needed
*/ */
private final GenomeLocParser genomeLocParser; private final GenomeLocParser genomeLocParser;
/**
* A complete list of all samples that may come out of the reads. Must be
* comprehensive.
*/
private final ArrayList<String> samples; private final ArrayList<String> samples;
/**
* The system that maps incoming reads from the iterator to their pileup states
*/
private final ReadStateManager readStates; private final ReadStateManager readStates;
/**
* Should we include reads in the pileup which are aligned with a deletion operator to the reference?
*/
private final boolean includeReadsWithDeletionAtLoci; private final boolean includeReadsWithDeletionAtLoci;
/**
* The next alignment context. A non-null value means that a
* context is waiting from hasNext() for sending off to the next next() call. A null
* value means that either hasNext() has not been called at all or that
* the underlying iterator is exhausted
*/
private AlignmentContext nextAlignmentContext; private AlignmentContext nextAlignmentContext;
// ----------------------------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------------------------
@ -98,6 +122,18 @@ public class LocusIteratorByState extends LocusIterator {
// //
// ----------------------------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------------------------
/**
* Create a new LocusIteratorByState
*
* @param samIterator the iterator of reads to process into pileups. Reads must be ordered
* according to standard coordinate-sorted BAM conventions
* @param readInformation meta-information about how to process the reads (i.e., should we do downsampling?)
* @param genomeLocParser used to create genome locs
* @param samples a complete list of samples present in the read groups for the reads coming from samIterator.
* This is generally just the set of read group sample fields in the SAMFileHeader. This
* list of samples may contain a null element, and all reads without read groups will
* be mapped to this null sample
*/
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, public LocusIteratorByState(final Iterator<SAMRecord> samIterator,
final ReadProperties readInformation, final ReadProperties readInformation,
final GenomeLocParser genomeLocParser, final GenomeLocParser genomeLocParser,
@ -116,16 +152,21 @@ public class LocusIteratorByState extends LocusIterator {
final GenomeLocParser genomeLocParser, final GenomeLocParser genomeLocParser,
final Collection<String> samples, final Collection<String> samples,
final boolean maintainUniqueReadsList) { final boolean maintainUniqueReadsList) {
if ( samIterator == null ) throw new IllegalArgumentException("samIterator cannot be null");
if ( downsamplingInfo == null ) throw new IllegalArgumentException("downsamplingInfo cannot be null");
if ( genomeLocParser == null ) throw new IllegalArgumentException("genomeLocParser cannot be null");
if ( samples == null ) throw new IllegalArgumentException("Samples cannot be null");
// currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
// there's no read data. So we need to throw this error only when samIterator.hasNext() is true
if (samples.isEmpty() && samIterator.hasNext()) {
throw new IllegalArgumentException("samples list must not be empty");
}
this.genomeLocParser = genomeLocParser; this.genomeLocParser = genomeLocParser;
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci; this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
this.samples = new ArrayList<String>(samples); this.samples = new ArrayList<String>(samples);
this.readStates = new ReadStateManager(samIterator, this.samples, downsamplingInfo, maintainUniqueReadsList); this.readStates = new ReadStateManager(samIterator, this.samples, downsamplingInfo, maintainUniqueReadsList);
// currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
// there's no read data. So we need to throw this error only when samIterator.hasNext() is true
if (this.samples.isEmpty() && samIterator.hasNext()) {
throw new IllegalArgumentException("samples list must not be empty");
}
} }
@Override @Override
@ -133,16 +174,14 @@ public class LocusIteratorByState extends LocusIterator {
return this; return this;
} }
@Override /**
public void close() { * Get the current location (i.e., the bp of the center of the pileup) of the pileup, or null if not anywhere yet
} *
* Assumes that read states is updated to reflect the current pileup position, but not advanced to the
@Override * next location.
public boolean hasNext() { *
lazyLoadNextAlignmentContext(); * @return the location of the current pileup, or null if we're after all reads
return nextAlignmentContext != null; */
}
private GenomeLoc getLocation() { private GenomeLoc getLocation() {
return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser); return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser);
} }
@ -153,6 +192,22 @@ public class LocusIteratorByState extends LocusIterator {
// //
// ----------------------------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------------------------
/**
* Is there another pileup available?
* @return
*/
@Override
public boolean hasNext() {
lazyLoadNextAlignmentContext();
return nextAlignmentContext != null;
}
/**
* Get the next AlignmentContext available from the reads.
*
* @return a non-null AlignmentContext of the pileup after to the next genomic position covered by
* at least one read.
*/
@Override @Override
public AlignmentContext next() { public AlignmentContext next() {
lazyLoadNextAlignmentContext(); lazyLoadNextAlignmentContext();
@ -164,8 +219,9 @@ public class LocusIteratorByState extends LocusIterator {
} }
/** /**
* Creates the next alignment context from the given state. Note that this is implemented as a lazy load method. * Creates the next alignment context from the given state. Note that this is implemented as a
* nextAlignmentContext MUST BE null in order for this method to advance to the next entry. * lazy load method. nextAlignmentContext MUST BE null in order for this method to advance to the
* next entry.
*/ */
private void lazyLoadNextAlignmentContext() { private void lazyLoadNextAlignmentContext() {
while (nextAlignmentContext == null && readStates.hasNext()) { while (nextAlignmentContext == null && readStates.hasNext()) {
@ -193,7 +249,7 @@ public class LocusIteratorByState extends LocusIterator {
if (op == CigarOperator.N) // N's are never added to any pileup if (op == CigarOperator.N) // N's are never added to any pileup
continue; continue;
if (!filterBaseInRead(read, location.getStart())) { if (!dontIncludeReadInPileup(read, location.getStart())) {
if ( op == CigarOperator.D ) { if ( op == CigarOperator.D ) {
if ( ! includeReadsWithDeletionAtLoci ) if ( ! includeReadsWithDeletionAtLoci )
continue; continue;
@ -220,6 +276,10 @@ public class LocusIteratorByState extends LocusIterator {
} }
} }
/**
* Advances all fo the read states by one bp. After this call the read states are reflective
* of the next pileup.
*/
private void updateReadStates() { private void updateReadStates() {
for (final String sample : samples) { for (final String sample : samples) {
Iterator<AlignmentStateMachine> it = readStates.iterator(sample); Iterator<AlignmentStateMachine> it = readStates.iterator(sample);
@ -288,13 +348,16 @@ public class LocusIteratorByState extends LocusIterator {
// ----------------------------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------------------------
/** /**
* Should this read be excluded from the pileup?
*
* Generic place to put per-base filters appropriate to LocusIteratorByState * Generic place to put per-base filters appropriate to LocusIteratorByState
* *
* @param rec * @param rec the read to potentially exclude
* @param pos * @param pos the genomic position of the current alignment
* @return * @return true if the read should be excluded from the pileup, false otherwise
*/ */
private boolean filterBaseInRead(GATKSAMRecord rec, long pos) { @Requires({"rec != null", "pos > 0"})
private boolean dontIncludeReadInPileup(GATKSAMRecord rec, long pos) {
return ReadUtils.isBaseInsideAdaptor(rec, pos); return ReadUtils.isBaseInsideAdaptor(rec, pos);
} }
@ -311,6 +374,8 @@ public class LocusIteratorByState extends LocusIterator {
* @param readInfo GATK engine information about what should be done to the reads * @param readInfo GATK engine information about what should be done to the reads
* @return a LIBS specific info holder about downsampling only * @return a LIBS specific info holder about downsampling only
*/ */
@Requires("readInfo != null")
@Ensures("result != null")
private static LIBSDownsamplingInfo toDownsamplingInfo(final ReadProperties readInfo) { private static LIBSDownsamplingInfo toDownsamplingInfo(final ReadProperties readInfo) {
final boolean performDownsampling = readInfo.getDownsamplingMethod() != null && final boolean performDownsampling = readInfo.getDownsamplingMethod() != null &&
readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE && readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&

View File

@ -41,7 +41,7 @@ public class AlignmentStateMachineUnitTest extends LocusIteratorByStateBaseTest
// return new Object[][]{{new LIBSTest("2M2D2X", 2)}}; // return new Object[][]{{new LIBSTest("2M2D2X", 2)}};
// return createLIBSTests( // return createLIBSTests(
// Arrays.asList(2), // Arrays.asList(2),
// Arrays.asList(5)); // Arrays.asList(2));
return createLIBSTests( return createLIBSTests(
Arrays.asList(1, 2), Arrays.asList(1, 2),
Arrays.asList(1, 2, 3, 4)); Arrays.asList(1, 2, 3, 4));
@ -63,15 +63,23 @@ public class AlignmentStateMachineUnitTest extends LocusIteratorByStateBaseTest
int lastOffset = -1; int lastOffset = -1;
// TODO -- more tests about test state machine state before first step? // TODO -- more tests about test state machine state before first step?
Assert.assertTrue(state.isEdge()); Assert.assertTrue(state.isLeftEdge());
Assert.assertNull(state.getCigarOperator());
Assert.assertNotNull(state.toString());
Assert.assertEquals(state.getReadOffset(), -1);
Assert.assertEquals(state.getGenomeOffset(), -1);
Assert.assertEquals(state.getCurrentCigarElementOffset(), -1);
Assert.assertEquals(state.getCurrentCigarElement(), null);
while ( state.stepForwardOnGenome() != null ) { while ( state.stepForwardOnGenome() != null ) {
Assert.assertNotNull(state.toString());
tester.stepForwardOnGenome(); tester.stepForwardOnGenome();
Assert.assertTrue(state.getReadOffset() >= lastOffset, "Somehow read offsets are decreasing: lastOffset " + lastOffset + " current " + state.getReadOffset()); Assert.assertTrue(state.getReadOffset() >= lastOffset, "Somehow read offsets are decreasing: lastOffset " + lastOffset + " current " + state.getReadOffset());
Assert.assertEquals(state.getReadOffset(), tester.getCurrentReadOffset(), "Read offsets are wrong at " + bpVisited); Assert.assertEquals(state.getReadOffset(), tester.getCurrentReadOffset(), "Read offsets are wrong at " + bpVisited);
Assert.assertFalse(state.isEdge()); Assert.assertFalse(state.isLeftEdge());
Assert.assertEquals(state.getCurrentCigarElement(), read.getCigar().getCigarElement(tester.currentOperatorIndex), "CigarElement index failure"); Assert.assertEquals(state.getCurrentCigarElement(), read.getCigar().getCigarElement(tester.currentOperatorIndex), "CigarElement index failure");
Assert.assertEquals(state.getOffsetIntoCurrentCigarElement(), tester.getCurrentPositionOnOperatorBase0(), "CigarElement index failure"); Assert.assertEquals(state.getOffsetIntoCurrentCigarElement(), tester.getCurrentPositionOnOperatorBase0(), "CigarElement index failure");
@ -91,5 +99,9 @@ public class AlignmentStateMachineUnitTest extends LocusIteratorByStateBaseTest
} }
Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp"); Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp");
Assert.assertEquals(state.getReadOffset(), read.getReadLength());
Assert.assertEquals(state.getCurrentCigarElementOffset(), read.getCigarLength());
Assert.assertEquals(state.getCurrentCigarElement(), null);
Assert.assertNotNull(state.toString());
} }
} }

View File

@ -90,7 +90,7 @@ public class LocusIteratorByStateBaseTest extends BaseTest {
new ValidationExclusion(), new ValidationExclusion(),
Collections.<ReadFilter>emptyList(), Collections.<ReadFilter>emptyList(),
Collections.<ReadTransformer>emptyList(), Collections.<ReadTransformer>emptyList(),
false, true,
(byte) -1, (byte) -1,
keepReads); keepReads);
} }

View File

@ -50,9 +50,10 @@ import java.util.*;
* testing of the new (non-legacy) version of LocusIteratorByState * testing of the new (non-legacy) version of LocusIteratorByState
*/ */
public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
private static final boolean DEBUG = false;
protected LocusIteratorByState li; protected LocusIteratorByState li;
@Test @Test(enabled = true && ! DEBUG)
public void testXandEQOperators() { public void testXandEQOperators() {
final byte[] bases1 = new byte[] {'A','A','A','A','A','A','A','A','A','A'}; final byte[] bases1 = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
final byte[] bases2 = new byte[] {'A','A','A','C','A','A','A','A','A','C'}; final byte[] bases2 = new byte[] {'A','A','A','C','A','A','A','A','A','C'};
@ -92,7 +93,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
} }
} }
@Test(enabled = true) @Test(enabled = true && ! DEBUG)
public void testIndelsInRegularPileup() { public void testIndelsInRegularPileup() {
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'}; final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
final byte[] indelBases = new byte[] {'A','A','A','A','C','T','A','A','A','A','A','A'}; final byte[] indelBases = new byte[] {'A','A','A','A','C','T','A','A','A','A','A','A'};
@ -138,7 +139,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
Assert.assertTrue(foundIndel,"Indel in pileup not found"); Assert.assertTrue(foundIndel,"Indel in pileup not found");
} }
@Test(enabled = false) @Test(enabled = false && ! DEBUG)
public void testWholeIndelReadInIsolation() { public void testWholeIndelReadInIsolation() {
final int firstLocus = 44367789; final int firstLocus = 44367789;
@ -169,7 +170,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
* Test to make sure that reads supporting only an indel (example cigar string: 76I) do * Test to make sure that reads supporting only an indel (example cigar string: 76I) do
* not negatively influence the ordering of the pileup. * not negatively influence the ordering of the pileup.
*/ */
@Test(enabled = true) @Test(enabled = true && ! DEBUG)
public void testWholeIndelRead() { public void testWholeIndelRead() {
final int firstLocus = 44367788, secondLocus = firstLocus + 1; final int firstLocus = 44367788, secondLocus = firstLocus + 1;
@ -220,7 +221,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
/** /**
* Test to make sure that reads supporting only an indel (example cigar string: 76I) are represented properly * Test to make sure that reads supporting only an indel (example cigar string: 76I) are represented properly
*/ */
@Test(enabled = false) @Test(enabled = false && ! DEBUG)
public void testWholeIndelReadRepresentedTest() { public void testWholeIndelReadRepresentedTest() {
final int firstLocus = 44367788, secondLocus = firstLocus + 1; final int firstLocus = 44367788, secondLocus = firstLocus + 1;
@ -298,7 +299,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});
} }
@Test(dataProvider = "IndelLengthAndBasesTest") @Test(enabled = true && ! DEBUG, dataProvider = "IndelLengthAndBasesTest")
public void testIndelLengthAndBasesTest(GATKSAMRecord read, final CigarOperator op, final int eventSize, final String eventBases) { public void testIndelLengthAndBasesTest(GATKSAMRecord read, final CigarOperator op, final int eventSize, final String eventBases) {
// create the iterator by state with the fake reads and fake records // create the iterator by state with the fake reads and fake records
li = makeLTBS(Arrays.asList((SAMRecord)read), createTestReadProperties()); li = makeLTBS(Arrays.asList((SAMRecord)read), createTestReadProperties());
@ -337,7 +338,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
public Object[][] makeLIBSTest() { public Object[][] makeLIBSTest() {
final List<Object[]> tests = new LinkedList<Object[]>(); final List<Object[]> tests = new LinkedList<Object[]>();
// tests.add(new Object[]{new LIBSTest("1X2D2P2X", 1)}); // tests.add(new Object[]{new LIBSTest("2=2D2=2X", 1)});
// return tests.toArray(new Object[][]{}); // return tests.toArray(new Object[][]{});
return createLIBSTests( return createLIBSTests(
@ -349,7 +350,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
// Arrays.asList(3)); // Arrays.asList(3));
} }
@Test(enabled = false, dataProvider = "LIBSTest") @Test(enabled = true, dataProvider = "LIBSTest")
public void testLIBS(LIBSTest params) { public void testLIBS(LIBSTest params) {
// create the iterator by state with the fake reads and fake records // create the iterator by state with the fake reads and fake records
final GATKSAMRecord read = params.makeRead(); final GATKSAMRecord read = params.makeRead();
@ -366,19 +367,19 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
Assert.assertEquals(p.getNumberOfElements(), 1); Assert.assertEquals(p.getNumberOfElements(), 1);
PileupElement pe = p.iterator().next(); PileupElement pe = p.iterator().next();
Assert.assertEquals(p.getNumberOfDeletions(), pe.isDeletion() ? 1 : 0); Assert.assertEquals(p.getNumberOfDeletions(), pe.isDeletion() ? 1 : 0, "wrong number of deletions in the pileup");
Assert.assertEquals(p.getNumberOfMappingQualityZeroReads(), pe.getRead().getMappingQuality() == 0 ? 1 : 0); Assert.assertEquals(p.getNumberOfMappingQualityZeroReads(), pe.getRead().getMappingQuality() == 0 ? 1 : 0, "wront number of mapq reads in the pileup");
tester.stepForwardOnGenome(); tester.stepForwardOnGenome();
if ( ! hasNeighboringPaddedOps(params.getElements(), pe.getCurrentCigarOffset()) ) { if ( ! hasNeighboringPaddedOps(params.getElements(), pe.getCurrentCigarOffset()) ) {
Assert.assertEquals(pe.isBeforeDeletionStart(), tester.isBeforeDeletionStart); Assert.assertEquals(pe.isBeforeDeletionStart(), tester.isBeforeDeletionStart, "before deletion start failure");
Assert.assertEquals(pe.isAfterDeletionEnd(), tester.isAfterDeletionEnd); Assert.assertEquals(pe.isAfterDeletionEnd(), tester.isAfterDeletionEnd, "after deletion end failure");
} }
Assert.assertEquals(pe.isBeforeInsertion(), tester.isBeforeInsertion); Assert.assertEquals(pe.isBeforeInsertion(), tester.isBeforeInsertion, "before insertion failure");
Assert.assertEquals(pe.isAfterInsertion(), tester.isAfterInsertion); Assert.assertEquals(pe.isAfterInsertion(), tester.isAfterInsertion, "after insertion failure");
Assert.assertEquals(pe.isNextToSoftClip(), tester.isNextToSoftClip); Assert.assertEquals(pe.isNextToSoftClip(), tester.isNextToSoftClip, "next to soft clip failure");
Assert.assertTrue(pe.getOffset() >= lastOffset, "Somehow read offsets are decreasing: lastOffset " + lastOffset + " current " + pe.getOffset()); Assert.assertTrue(pe.getOffset() >= lastOffset, "Somehow read offsets are decreasing: lastOffset " + lastOffset + " current " + pe.getOffset());
Assert.assertEquals(pe.getOffset(), tester.getCurrentReadOffset(), "Read offsets are wrong at " + bpVisited); Assert.assertEquals(pe.getOffset(), tester.getCurrentReadOffset(), "Read offsets are wrong at " + bpVisited);
@ -391,7 +392,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
Assert.assertTrue(pe.getOffsetInCurrentCigar() >= 0, "Offset into current cigar too small"); Assert.assertTrue(pe.getOffsetInCurrentCigar() >= 0, "Offset into current cigar too small");
Assert.assertTrue(pe.getOffsetInCurrentCigar() < pe.getCurrentCigarElement().getLength(), "Offset into current cigar too big"); Assert.assertTrue(pe.getOffsetInCurrentCigar() < pe.getCurrentCigarElement().getLength(), "Offset into current cigar too big");
Assert.assertEquals(pe.getOffset(), tester.getCurrentReadOffset()); Assert.assertEquals(pe.getOffset(), tester.getCurrentReadOffset(), "Read offset failure");
lastOffset = pe.getOffset(); lastOffset = pe.getOffset();
} }
@ -431,7 +432,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});
} }
@Test(enabled = true, dataProvider = "LIBSKeepSubmittedReads") @Test(enabled = true && ! DEBUG, dataProvider = "LIBSKeepSubmittedReads")
public void testLIBSKeepSubmittedReads(final int nReadsPerLocus, public void testLIBSKeepSubmittedReads(final int nReadsPerLocus,
final int nLoci, final int nLoci,
final int nSamples, final int nSamples,