Near final cleanup of PileupElement
-- All functions documented and unit tested -- New constructor interface -- Cleanup some uses of old / removed functionality
This commit is contained in:
parent
fb9eb3d4ee
commit
8b83f4d6c7
|
|
@ -169,8 +169,7 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR
|
|||
* @return true if this base is part of a meaningful read for comparison, false otherwise
|
||||
*/
|
||||
public static boolean isUsableBase(final PileupElement p, final boolean allowDeletions) {
|
||||
return !(p.isInsertionAtBeginningOfRead() ||
|
||||
(! allowDeletions && p.isDeletion()) ||
|
||||
return !((! allowDeletions && p.isDeletion()) ||
|
||||
p.getMappingQual() == 0 ||
|
||||
p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE ||
|
||||
((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE); // need the unBAQed quality score here
|
||||
|
|
|
|||
|
|
@ -323,22 +323,12 @@ public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLi
|
|||
public ReadBackedPileup createBAQedPileup( final ReadBackedPileup pileup ) {
|
||||
final List<PileupElement> BAQedElements = new ArrayList<PileupElement>();
|
||||
for( final PileupElement PE : pileup ) {
|
||||
final PileupElement newPE = new BAQedPileupElement( PE );
|
||||
final PileupElement newPE = new SNPGenotypeLikelihoodsCalculationModel.BAQedPileupElement( PE );
|
||||
BAQedElements.add( newPE );
|
||||
}
|
||||
return new ReadBackedPileupImpl( pileup.getLocation(), BAQedElements );
|
||||
}
|
||||
|
||||
public class BAQedPileupElement extends PileupElement {
|
||||
public BAQedPileupElement( final PileupElement PE ) {
|
||||
super(PE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getQual( final int offset ) { return BAQ.calcBAQFromTag(getRead(), offset, true); }
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Helper function that returns the phred-scaled base quality score we should use for calculating
|
||||
* likelihoods for a pileup element. May return 0 to indicate that the observation is bad, and may
|
||||
|
|
|
|||
|
|
@ -252,7 +252,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
protected int getFilteredDepth(ReadBackedPileup pileup) {
|
||||
int count = 0;
|
||||
for (PileupElement p : pileup) {
|
||||
if (p.isDeletion() || p.isInsertionAtBeginningOfRead() || BaseUtils.isRegularBase(p.getBase()))
|
||||
if (p.isDeletion() || BaseUtils.isRegularBase(p.getBase()))
|
||||
count += p.getRepresentativeCount();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -241,7 +241,12 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
}
|
||||
|
||||
@Override
|
||||
public byte getQual( final int offset ) { return BAQ.calcBAQFromTag(getRead(), offset, true); }
|
||||
public byte getQual() {
|
||||
if ( isDeletion() )
|
||||
return super.getQual();
|
||||
else
|
||||
return BAQ.calcBAQFromTag(getRead(), offset, true);
|
||||
}
|
||||
}
|
||||
|
||||
private static class SampleGenotypeData {
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
"currentCigarElementOffset >= -1",
|
||||
"currentCigarElementOffset <= nCigarElements"
|
||||
})
|
||||
class AlignmentStateMachine {
|
||||
public class AlignmentStateMachine {
|
||||
/**
|
||||
* Our read
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -25,6 +25,11 @@ public abstract class LocusIterator implements Iterable<AlignmentContext>, Close
|
|||
public abstract boolean hasNext();
|
||||
public abstract AlignmentContext next();
|
||||
|
||||
// TODO -- remove me when ART testing is done
|
||||
public LocusIteratorByState getLIBS() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,6 +47,11 @@ import java.util.List;
|
|||
* Time: 8:54:05 AM
|
||||
*/
|
||||
public class PileupElement implements Comparable<PileupElement> {
|
||||
private final static LinkedList<CigarElement> EMPTY_LINKED_LIST = new LinkedList<CigarElement>();
|
||||
|
||||
private final static EnumSet<CigarOperator> ON_GENOME_OPERATORS =
|
||||
EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X, CigarOperator.D);
|
||||
|
||||
public static final byte DELETION_BASE = BaseUtils.D;
|
||||
public static final byte DELETION_QUAL = (byte) 16;
|
||||
public static final byte A_FOLLOWED_BY_INSERTION_BASE = (byte) 87;
|
||||
|
|
@ -90,13 +95,34 @@ public class PileupElement implements Comparable<PileupElement> {
|
|||
currentCigarOffset = offsetInCurrentCigar = -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new pileup element
|
||||
*
|
||||
* @param read a non-null read to pileup
|
||||
* @param baseOffset the offset into the read's base / qual vector aligned to this position on the genome. If the
|
||||
* current cigar element is a deletion, offset should be the offset of the last M/=/X position.
|
||||
* @param currentElement a non-null CigarElement that indicates the cigar element aligning the read to the genome
|
||||
* @param currentCigarOffset the offset of currentElement in read.getCigar().getElement(currentCigarOffset) == currentElement)
|
||||
* @param offsetInCurrentCigar how far into the currentElement are we in our alignment to the genome?
|
||||
*/
|
||||
public PileupElement(final GATKSAMRecord read, final int baseOffset,
|
||||
final CigarElement currentElement, final int currentCigarOffset, final int offsetInCurrentCigar) {
|
||||
final CigarElement currentElement, final int currentCigarOffset,
|
||||
final int offsetInCurrentCigar) {
|
||||
assert currentElement != null;
|
||||
|
||||
this.read = read;
|
||||
this.offset = baseOffset;
|
||||
this.currentCigarElement = currentElement;
|
||||
this.currentCigarOffset = currentCigarOffset;
|
||||
this.offsetInCurrentCigar = offsetInCurrentCigar;
|
||||
|
||||
// for performance regions these are assertions
|
||||
assert this.read != null;
|
||||
assert this.offset >= 0 && this.offset < this.read.getReadLength();
|
||||
assert this.currentCigarOffset >= 0;
|
||||
assert this.currentCigarOffset < read.getCigarLength();
|
||||
assert this.offsetInCurrentCigar >= 0;
|
||||
assert this.offsetInCurrentCigar < currentElement.getLength();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -112,50 +138,100 @@ public class PileupElement implements Comparable<PileupElement> {
|
|||
throw new UnsupportedOperationException("please use LocusIteratorByState.createPileupForReadAndOffset instead");
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this element a deletion w.r.t. the reference genome?
|
||||
*
|
||||
* @return true if this is a deletion, false otherwise
|
||||
*/
|
||||
public boolean isDeletion() {
|
||||
return currentCigarElement.getOperator() == CigarOperator.D;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the current element immediately before a deletion, but itself not a deletion?
|
||||
*
|
||||
* Suppose we are aligning a read with cigar 3M2D1M. This function is true
|
||||
* if we are in the last cigar position of the 3M, but not if we are in the 2D itself.
|
||||
*
|
||||
* @return true if the next alignment position is a deletion w.r.t. the reference genome
|
||||
*/
|
||||
public boolean isBeforeDeletionStart() {
|
||||
return isBeforeDeletion() && ! isDeletion();
|
||||
return ! isDeletion() && atEndOfCurrentCigar() && hasOperator(getNextOnGenomeCigarElement(), CigarOperator.D);
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the current element immediately after a deletion, but itself not a deletion?
|
||||
*
|
||||
* Suppose we are aligning a read with cigar 1M2D3M. This function is true
|
||||
* if we are in the first cigar position of the 3M, but not if we are in the 2D itself or
|
||||
* in any but the first position of the 3M.
|
||||
*
|
||||
* @return true if the previous alignment position is a deletion w.r.t. the reference genome
|
||||
*/
|
||||
public boolean isAfterDeletionEnd() {
|
||||
return isAfterDeletion() && ! isDeletion();
|
||||
}
|
||||
|
||||
public boolean isInsertionAtBeginningOfRead() {
|
||||
return offset == -1;
|
||||
return ! isDeletion() && atStartOfCurrentCigar() && hasOperator(getPreviousOnGenomeCigarElement(), CigarOperator.D);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the read for this pileup element
|
||||
* @return a non-null GATKSAMRecord
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public GATKSAMRecord getRead() {
|
||||
return read;
|
||||
}
|
||||
|
||||
@Ensures("result == offset")
|
||||
/**
|
||||
* Get the offset of the this element into the read that aligns that read's base to this genomic position.
|
||||
*
|
||||
* If the current element is a deletion then offset is the offset of the last base containing offset.
|
||||
*
|
||||
* @return a valid offset into the read's bases
|
||||
*/
|
||||
@Ensures({"result >= 0", "result <= read.getReadLength()"})
|
||||
public int getOffset() {
|
||||
return offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the base aligned to the genome at this location
|
||||
*
|
||||
* If the current element is a deletion returns DELETION_BASE
|
||||
*
|
||||
* @return a base encoded as a byte
|
||||
*/
|
||||
@Ensures("result != DELETION_BASE || (isDeletion() && result == DELETION_BASE)")
|
||||
public byte getBase() {
|
||||
return getBase(offset);
|
||||
return isDeletion() ? DELETION_BASE : read.getReadBases()[offset];
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public int getBaseIndex() {
|
||||
return getBaseIndex(offset);
|
||||
return BaseUtils.simpleBaseToBaseIndex(getBase());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the base quality score of the base at this aligned position on the genome
|
||||
* @return a phred-scaled quality score as a byte
|
||||
*/
|
||||
public byte getQual() {
|
||||
return getQual(offset);
|
||||
return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Base Insertion quality at this pileup position
|
||||
* @return a phred-scaled quality score as a byte
|
||||
*/
|
||||
public byte getBaseInsertionQual() {
|
||||
return getBaseInsertionQual(offset);
|
||||
return isDeletion() ? DELETION_QUAL : read.getBaseInsertionQualities()[offset];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Base Deletion quality at this pileup position
|
||||
* @return a phred-scaled quality score as a byte
|
||||
*/
|
||||
public byte getBaseDeletionQual() {
|
||||
return getBaseDeletionQual(offset);
|
||||
return isDeletion() ? DELETION_QUAL : read.getBaseDeletionQualities()[offset];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -222,6 +298,10 @@ public class PileupElement implements Comparable<PileupElement> {
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the mapping quality of the read of this element
|
||||
* @return the mapping quality of the underlying SAM record
|
||||
*/
|
||||
public int getMappingQual() {
|
||||
return read.getMappingQuality();
|
||||
}
|
||||
|
|
@ -231,26 +311,6 @@ public class PileupElement implements Comparable<PileupElement> {
|
|||
return String.format("%s @ %d = %c Q%d", getRead().getReadName(), getOffset(), (char) getBase(), getQual());
|
||||
}
|
||||
|
||||
protected byte getBase(final int offset) {
|
||||
return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_BASE : read.getReadBases()[offset];
|
||||
}
|
||||
|
||||
protected int getBaseIndex(final int offset) {
|
||||
return BaseUtils.simpleBaseToBaseIndex((isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_BASE : read.getReadBases()[offset]);
|
||||
}
|
||||
|
||||
protected byte getQual(final int offset) {
|
||||
return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_QUAL : read.getBaseQualities()[offset];
|
||||
}
|
||||
|
||||
protected byte getBaseInsertionQual(final int offset) {
|
||||
return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_QUAL : read.getBaseInsertionQualities()[offset];
|
||||
}
|
||||
|
||||
protected byte getBaseDeletionQual(final int offset) {
|
||||
return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_QUAL : read.getBaseDeletionQualities()[offset];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(final PileupElement pileupElement) {
|
||||
if (offset < pileupElement.offset)
|
||||
|
|
@ -281,44 +341,94 @@ public class PileupElement implements Comparable<PileupElement> {
|
|||
* @return
|
||||
*/
|
||||
public int getRepresentativeCount() {
|
||||
int representativeCount = 1;
|
||||
|
||||
if (read.isReducedRead() && !isInsertionAtBeginningOfRead()) {
|
||||
if (read.isReducedRead()) {
|
||||
if (isDeletion() && (offset + 1 >= read.getReadLength()) ) // deletion in the end of the read
|
||||
throw new UserException.MalformedBAM(read, String.format("Adjacent I/D events in read %s -- cigar: %s", read.getReadName(), read.getCigarString()));
|
||||
|
||||
representativeCount = (isDeletion()) ? MathUtils.fastRound((read.getReducedCount(offset) + read.getReducedCount(offset + 1)) / 2.0) : read.getReducedCount(offset);
|
||||
return isDeletion()
|
||||
? MathUtils.fastRound((read.getReducedCount(offset) + read.getReducedCount(offset + 1)) / 2.0)
|
||||
: read.getReducedCount(offset);
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
return representativeCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the cigar element aligning this element to the genome
|
||||
* @return a non-null CigarElement
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public CigarElement getCurrentCigarElement() {
|
||||
return currentCigarElement;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the offset of this cigar element in the Cigar of the current read (0-based)
|
||||
*
|
||||
* Suppose the cigar is 1M2D3I4D. If we are in the 1M state this function returns
|
||||
* 0. If we are in 2D, the result is 1. If we are in the 4D, the result is 3.
|
||||
*
|
||||
* @return an offset into the read.getCigar() that brings us to the current cigar element
|
||||
*/
|
||||
public int getCurrentCigarOffset() {
|
||||
return currentCigarOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the offset into the *current* cigar element for this alignment position
|
||||
*
|
||||
* We can be anywhere from offset 0 (first position) to length - 1 of the current
|
||||
* cigar element aligning us to this genomic position.
|
||||
*
|
||||
* @return a valid offset into the current cigar element
|
||||
*/
|
||||
@Ensures({"result >= 0", "result < getCurrentCigarElement().getLength()"})
|
||||
public int getOffsetInCurrentCigar() {
|
||||
return offsetInCurrentCigar;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the cigar elements that occur before the current position but after the previous position on the genome
|
||||
*
|
||||
* For example, if we are in the 3M state of 1M2I3M state then 2I occurs before this position.
|
||||
*
|
||||
* Note that this function does not care where we are in the current cigar element. In the previous
|
||||
* example this list of elements contains the 2I state regardless of where you are in the 3M.
|
||||
*
|
||||
* Note this returns the list of all elements that occur between this and the prev site, so for
|
||||
* example we might have 5S10I2M and this function would return [5S, 10I].
|
||||
*
|
||||
* @return a non-null list of CigarElements
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public LinkedList<CigarElement> getBetweenPrevPosition() {
|
||||
return atStartOfCurrentCigar() ? getBetween(-1) : EMPTY_LINKED_LIST;
|
||||
return atStartOfCurrentCigar() ? getBetween(Direction.PREV) : EMPTY_LINKED_LIST;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the cigar elements that occur after the current position but before the next position on the genome
|
||||
*
|
||||
* @see #getBetweenPrevPosition() for more details
|
||||
*
|
||||
* @return a non-null list of CigarElements
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public LinkedList<CigarElement> getBetweenNextPosition() {
|
||||
return atEndOfCurrentCigar() ? getBetween(1) : EMPTY_LINKED_LIST;
|
||||
return atEndOfCurrentCigar() ? getBetween(Direction.NEXT) : EMPTY_LINKED_LIST;
|
||||
}
|
||||
|
||||
// TODO -- can I make this unmodifable?
|
||||
private final static LinkedList<CigarElement> EMPTY_LINKED_LIST = new LinkedList<CigarElement>();
|
||||
/** for some helper functions */
|
||||
private enum Direction { PREV, NEXT }
|
||||
|
||||
private final static EnumSet<CigarOperator> ON_GENOME_OPERATORS =
|
||||
EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X, CigarOperator.D);
|
||||
|
||||
private LinkedList<CigarElement> getBetween(final int increment) {
|
||||
/**
|
||||
* Helper function to get cigar elements between this and either the prev or next genomic position
|
||||
*
|
||||
* @param direction PREVIOUS if we want before, NEXT if we want after
|
||||
* @return a non-null list of cigar elements between this and the neighboring position in direction
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
private LinkedList<CigarElement> getBetween(final Direction direction) {
|
||||
final int increment = direction == Direction.NEXT ? 1 : -1;
|
||||
LinkedList<CigarElement> elements = null;
|
||||
final int nCigarElements = read.getCigarLength();
|
||||
for ( int i = currentCigarOffset + increment; i >= 0 && i < nCigarElements; i += increment) {
|
||||
|
|
@ -343,15 +453,42 @@ public class PileupElement implements Comparable<PileupElement> {
|
|||
return elements == null ? EMPTY_LINKED_LIST : elements;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the cigar element of the previous genomic aligned position
|
||||
*
|
||||
* For example, we might have 1M2I3M, and be sitting at the someone in the 3M. This
|
||||
* function would return 1M, as the 2I isn't on the genome. Note this function skips
|
||||
* all of the positions that would occur in the current element. So the result
|
||||
* is always 1M regardless of whether we're in the first, second, or third position of the 3M
|
||||
* cigar.
|
||||
*
|
||||
* @return a CigarElement, or null (indicating that no previous element exists)
|
||||
*/
|
||||
@Ensures("result == null || ON_GENOME_OPERATORS.contains(result.getOperator())")
|
||||
public CigarElement getPreviousOnGenomeCigarElement() {
|
||||
return getNeighboringOnGenomeCigarElement(-1);
|
||||
return getNeighboringOnGenomeCigarElement(Direction.PREV);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the cigar element of the next genomic aligned position
|
||||
*
|
||||
* @see #getPreviousOnGenomeCigarElement() for more details
|
||||
*
|
||||
* @return a CigarElement, or null (indicating that no next element exists)
|
||||
*/
|
||||
@Ensures("result == null || ON_GENOME_OPERATORS.contains(result.getOperator())")
|
||||
public CigarElement getNextOnGenomeCigarElement() {
|
||||
return getNeighboringOnGenomeCigarElement(1);
|
||||
return getNeighboringOnGenomeCigarElement(Direction.NEXT);
|
||||
}
|
||||
|
||||
private CigarElement getNeighboringOnGenomeCigarElement(final int increment) {
|
||||
/**
|
||||
* Helper function to get the cigar element of the next or previous genomic position
|
||||
* @param direction the direction to look in
|
||||
* @return a CigarElement, or null if no such element exists
|
||||
*/
|
||||
@Ensures("result == null || ON_GENOME_OPERATORS.contains(result.getOperator())")
|
||||
private CigarElement getNeighboringOnGenomeCigarElement(final Direction direction) {
|
||||
final int increment = direction == Direction.NEXT ? 1 : -1;
|
||||
final int nCigarElements = read.getCigarLength();
|
||||
|
||||
for ( int i = currentCigarOffset + increment; i >= 0 && i < nCigarElements; i += increment) {
|
||||
|
|
@ -364,31 +501,97 @@ public class PileupElement implements Comparable<PileupElement> {
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the cigar element (which may be null) have operation toMatch?
|
||||
*
|
||||
* @param maybeCigarElement a CigarElement that might be null
|
||||
* @param toMatch a CigarOperator we want to match against the one in maybeCigarElement
|
||||
* @return true if maybeCigarElement isn't null and has operator toMatch
|
||||
*/
|
||||
@Requires("toMatch != null")
|
||||
private boolean hasOperator(final CigarElement maybeCigarElement, final CigarOperator toMatch) {
|
||||
return maybeCigarElement != null && maybeCigarElement.getOperator() == toMatch;
|
||||
}
|
||||
|
||||
public boolean isAfterDeletion() { return atStartOfCurrentCigar() && hasOperator(getPreviousOnGenomeCigarElement(), CigarOperator.D); }
|
||||
public boolean isBeforeDeletion() { return atEndOfCurrentCigar() && hasOperator(getNextOnGenomeCigarElement(), CigarOperator.D); }
|
||||
/**
|
||||
* Does an insertion occur immediately before the current position on the genome?
|
||||
*
|
||||
* @return true if yes, false if no
|
||||
*/
|
||||
public boolean isAfterInsertion() { return isAfter(getBetweenPrevPosition(), CigarOperator.I); }
|
||||
|
||||
/**
|
||||
* Does an insertion occur immediately after the current position on the genome?
|
||||
*
|
||||
* @return true if yes, false if no
|
||||
*/
|
||||
public boolean isBeforeInsertion() { return isBefore(getBetweenNextPosition(), CigarOperator.I); }
|
||||
|
||||
/**
|
||||
* Does a soft-clipping event occur immediately before the current position on the genome?
|
||||
*
|
||||
* @return true if yes, false if no
|
||||
*/
|
||||
public boolean isAfterSoftClip() { return isAfter(getBetweenPrevPosition(), CigarOperator.S); }
|
||||
|
||||
/**
|
||||
* Does a soft-clipping event occur immediately after the current position on the genome?
|
||||
*
|
||||
* @return true if yes, false if no
|
||||
*/
|
||||
public boolean isBeforeSoftClip() { return isBefore(getBetweenNextPosition(), CigarOperator.S); }
|
||||
|
||||
/**
|
||||
* Does a soft-clipping event occur immediately before or after the current position on the genome?
|
||||
*
|
||||
* @return true if yes, false if no
|
||||
*/
|
||||
public boolean isNextToSoftClip() { return isAfterSoftClip() || isBeforeSoftClip(); }
|
||||
|
||||
/**
|
||||
* Is the current position at the end of the current cigar?
|
||||
*
|
||||
* For example, if we are in element 3M, this function returns true if we are at offsetInCurrentCigar
|
||||
* of 2, but not 0 or 1.
|
||||
*
|
||||
* @return true if we're at the end of the current cigar
|
||||
*/
|
||||
public boolean atEndOfCurrentCigar() {
|
||||
return offsetInCurrentCigar == currentCigarElement.getLength() - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the current position at the start of the current cigar?
|
||||
*
|
||||
* For example, if we are in element 3M, this function returns true if we are at offsetInCurrentCigar
|
||||
* of 0, but not 1 or 2.
|
||||
*
|
||||
* @return true if we're at the start of the current cigar
|
||||
*/
|
||||
public boolean atStartOfCurrentCigar() {
|
||||
return offsetInCurrentCigar == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is op the last element in the list of elements?
|
||||
*
|
||||
* @param elements the elements to examine
|
||||
* @param op the op we want the last element's op to equal
|
||||
* @return true if op == last(elements).op
|
||||
*/
|
||||
@Requires({"elements != null", "op != null"})
|
||||
private boolean isAfter(final LinkedList<CigarElement> elements, final CigarOperator op) {
|
||||
return ! elements.isEmpty() && elements.peekLast().getOperator() == op;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is op the first element in the list of elements?
|
||||
*
|
||||
* @param elements the elements to examine
|
||||
* @param op the op we want the last element's op to equal
|
||||
* @return true if op == first(elements).op
|
||||
*/
|
||||
@Requires({"elements != null", "op != null"})
|
||||
private boolean isBefore(final List<CigarElement> elements, final CigarOperator op) {
|
||||
return ! elements.isEmpty() && elements.get(0).getOperator() == op;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -297,7 +297,7 @@ public class AlignmentUtils {
|
|||
}
|
||||
|
||||
public static int calcAlignmentByteArrayOffset(final Cigar cigar, final PileupElement pileupElement, final int alignmentStart, final int refLocus) {
|
||||
return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), pileupElement.isInsertionAtBeginningOfRead(), pileupElement.isDeletion(), alignmentStart, refLocus );
|
||||
return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), false, pileupElement.isDeletion(), alignmentStart, refLocus );
|
||||
}
|
||||
|
||||
public static int calcAlignmentByteArrayOffset(final Cigar cigar, final int offset, final boolean isInsertionAtBeginningOfRead, final boolean isDeletion, final int alignmentStart, final int refLocus) {
|
||||
|
|
|
|||
|
|
@ -123,24 +123,21 @@ public class LocusIteratorByStateBaseTest extends BaseTest {
|
|||
|
||||
protected static class LIBSTest {
|
||||
public static final int locus = 44367788;
|
||||
final String cigar;
|
||||
final String cigarString;
|
||||
final int readLength;
|
||||
final private List<CigarElement> elements;
|
||||
|
||||
public LIBSTest(final String cigar, final int readLength) {
|
||||
this(TextCigarCodec.getSingleton().decode(cigar).getCigarElements(), cigar, readLength);
|
||||
}
|
||||
|
||||
public LIBSTest(final List<CigarElement> elements, final String cigar, final int readLength) {
|
||||
this.elements = elements;
|
||||
this.cigar = cigar;
|
||||
this.readLength = readLength;
|
||||
public LIBSTest(final String cigarString) {
|
||||
final Cigar cigar = TextCigarCodec.getSingleton().decode(cigarString);
|
||||
this.cigarString = cigarString;
|
||||
this.elements = cigar.getCigarElements();
|
||||
this.readLength = cigar.getReadLength();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "LIBSTest{" +
|
||||
"cigar='" + cigar + '\'' +
|
||||
"cigar='" + cigarString + '\'' +
|
||||
", readLength=" + readLength +
|
||||
'}';
|
||||
}
|
||||
|
|
@ -156,7 +153,7 @@ public class LocusIteratorByStateBaseTest extends BaseTest {
|
|||
for ( int i = 0; i < readLength; i++ )
|
||||
quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE);
|
||||
read.setBaseQualities(quals);
|
||||
read.setCigarString(cigar);
|
||||
read.setCigarString(cigarString);
|
||||
return read;
|
||||
}
|
||||
}
|
||||
|
|
@ -220,7 +217,7 @@ public class LocusIteratorByStateBaseTest extends BaseTest {
|
|||
! (last.getOperator() == CigarOperator.I || last.getOperator() == CigarOperator.S))
|
||||
return null;
|
||||
|
||||
return new LIBSTest(elements, cigar, len);
|
||||
return new LIBSTest(cigar);
|
||||
}
|
||||
|
||||
@DataProvider(name = "LIBSTest")
|
||||
|
|
|
|||
|
|
@ -0,0 +1,191 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.locusiterator.AlignmentStateMachine;
|
||||
import org.broadinstitute.sting.utils.locusiterator.LIBS_position;
|
||||
import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByStateBaseTest;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* testing of the new (non-legacy) version of LocusIteratorByState
|
||||
*/
|
||||
public class PileupElementUnitTest extends LocusIteratorByStateBaseTest {
|
||||
@DataProvider(name = "PileupElementTest")
|
||||
public Object[][] makePileupElementTest() {
|
||||
// return new Object[][]{{new LIBSTest("2X2D2P2X")}};
|
||||
// return createLIBSTests(
|
||||
// Arrays.asList(2),
|
||||
// Arrays.asList(2));
|
||||
return createLIBSTests(
|
||||
Arrays.asList(1, 2),
|
||||
Arrays.asList(1, 2, 3, 4));
|
||||
}
|
||||
|
||||
@Test(dataProvider = "PileupElementTest")
|
||||
public void testPileupElementTest(LIBSTest params) {
|
||||
final GATKSAMRecord read = params.makeRead();
|
||||
final AlignmentStateMachine state = new AlignmentStateMachine(read);
|
||||
final LIBS_position tester = new LIBS_position(read);
|
||||
|
||||
while ( state.stepForwardOnGenome() != null ) {
|
||||
tester.stepForwardOnGenome();
|
||||
final PileupElement pe = state.makePileupElement();
|
||||
|
||||
Assert.assertEquals(pe.getRead(), read);
|
||||
Assert.assertEquals(pe.getMappingQual(), read.getMappingQuality());
|
||||
Assert.assertEquals(pe.getOffset(), state.getReadOffset());
|
||||
|
||||
Assert.assertEquals(pe.isDeletion(), state.getCigarOperator() == CigarOperator.D);
|
||||
Assert.assertEquals(pe.isAfterInsertion(), tester.isAfterInsertion);
|
||||
Assert.assertEquals(pe.isBeforeInsertion(), tester.isBeforeInsertion);
|
||||
Assert.assertEquals(pe.isNextToSoftClip(), tester.isNextToSoftClip);
|
||||
|
||||
if ( ! hasNeighboringPaddedOps(params.getElements(), pe.getCurrentCigarOffset()) ) {
|
||||
Assert.assertEquals(pe.isAfterDeletionEnd(), tester.isAfterDeletionEnd);
|
||||
Assert.assertEquals(pe.isBeforeDeletionStart(), tester.isBeforeDeletionStart);
|
||||
}
|
||||
|
||||
|
||||
|
||||
Assert.assertEquals(pe.atEndOfCurrentCigar(), state.getOffsetIntoCurrentCigarElement() == state.getCurrentCigarElement().getLength() - 1, "atEndOfCurrentCigar failed");
|
||||
Assert.assertEquals(pe.atStartOfCurrentCigar(), state.getOffsetIntoCurrentCigarElement() == 0, "atStartOfCurrentCigar failed");
|
||||
|
||||
Assert.assertEquals(pe.getBase(), pe.isDeletion() ? PileupElement.DELETION_BASE : read.getReadBases()[state.getReadOffset()]);
|
||||
Assert.assertEquals(pe.getQual(), pe.isDeletion() ? PileupElement.DELETION_QUAL : read.getBaseQualities()[state.getReadOffset()]);
|
||||
|
||||
Assert.assertEquals(pe.getCurrentCigarElement(), state.getCurrentCigarElement());
|
||||
Assert.assertEquals(pe.getCurrentCigarOffset(), state.getCurrentCigarElementOffset());
|
||||
|
||||
// tested in libs
|
||||
//pe.getLengthOfImmediatelyFollowingIndel();
|
||||
//pe.getBasesOfImmediatelyFollowingInsertion();
|
||||
|
||||
// Don't test -- pe.getBaseIndex();
|
||||
if ( pe.atEndOfCurrentCigar() && state.getCurrentCigarElementOffset() < read.getCigarLength() - 1 ) {
|
||||
final CigarElement nextElement = read.getCigar().getCigarElement(state.getCurrentCigarElementOffset() + 1);
|
||||
if ( nextElement.getOperator() == CigarOperator.I ) {
|
||||
Assert.assertTrue(pe.getBetweenNextPosition().size() >= 1);
|
||||
Assert.assertEquals(pe.getBetweenNextPosition().get(0), nextElement);
|
||||
}
|
||||
if ( nextElement.getOperator() == CigarOperator.M ) {
|
||||
Assert.assertTrue(pe.getBetweenNextPosition().isEmpty());
|
||||
}
|
||||
} else {
|
||||
Assert.assertTrue(pe.getBetweenNextPosition().isEmpty());
|
||||
}
|
||||
|
||||
if ( pe.atStartOfCurrentCigar() && state.getCurrentCigarElementOffset() > 0 ) {
|
||||
final CigarElement prevElement = read.getCigar().getCigarElement(state.getCurrentCigarElementOffset() - 1);
|
||||
if ( prevElement.getOperator() == CigarOperator.I ) {
|
||||
Assert.assertTrue(pe.getBetweenPrevPosition().size() >= 1);
|
||||
Assert.assertEquals(pe.getBetweenPrevPosition().getLast(), prevElement);
|
||||
}
|
||||
if ( prevElement.getOperator() == CigarOperator.M ) {
|
||||
Assert.assertTrue(pe.getBetweenPrevPosition().isEmpty());
|
||||
}
|
||||
} else {
|
||||
Assert.assertTrue(pe.getBetweenPrevPosition().isEmpty());
|
||||
}
|
||||
|
||||
// TODO -- add meaningful tests
|
||||
pe.getBaseInsertionQual();
|
||||
pe.getBaseDeletionQual();
|
||||
pe.getRepresentativeCount();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@DataProvider(name = "PrevAndNextTest")
|
||||
public Object[][] makePrevAndNextTest() {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
||||
final List<CigarOperator> operators = Arrays.asList(CigarOperator.I, CigarOperator.P, CigarOperator.S);
|
||||
|
||||
for ( final CigarOperator firstOp : Arrays.asList(CigarOperator.M) ) {
|
||||
for ( final CigarOperator lastOp : Arrays.asList(CigarOperator.M, CigarOperator.D) ) {
|
||||
for ( final int nIntermediate : Arrays.asList(1, 2, 3) ) {
|
||||
for ( final List<CigarOperator> combination : Utils.makePermutations(operators, nIntermediate, false) ) {
|
||||
final int readLength = 2 + combination.size();
|
||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, readLength);
|
||||
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||
read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
|
||||
|
||||
String cigar = "1" + firstOp;
|
||||
for ( final CigarOperator op : combination ) cigar += "1" + op;
|
||||
cigar += "1" + lastOp;
|
||||
read.setCigarString(cigar);
|
||||
|
||||
tests.add(new Object[]{read, firstOp, lastOp, combination});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "PrevAndNextTest")
|
||||
public void testPrevAndNextTest(final GATKSAMRecord read, final CigarOperator firstOp, final CigarOperator lastOp, final List<CigarOperator> ops) {
|
||||
final AlignmentStateMachine state = new AlignmentStateMachine(read);
|
||||
|
||||
state.stepForwardOnGenome();
|
||||
final PileupElement pe = state.makePileupElement();
|
||||
Assert.assertEquals(pe.getBetweenNextPosition().size(), ops.size());
|
||||
Assert.assertEquals(pe.getBetweenPrevPosition().size(), 0);
|
||||
assertEqualsOperators(pe.getBetweenNextPosition(), ops);
|
||||
Assert.assertEquals(pe.getPreviousOnGenomeCigarElement(), null);
|
||||
Assert.assertNotNull(pe.getNextOnGenomeCigarElement());
|
||||
Assert.assertEquals(pe.getNextOnGenomeCigarElement().getOperator(), lastOp);
|
||||
|
||||
state.stepForwardOnGenome();
|
||||
final PileupElement pe2 = state.makePileupElement();
|
||||
Assert.assertEquals(pe2.getBetweenPrevPosition().size(), ops.size());
|
||||
Assert.assertEquals(pe2.getBetweenNextPosition().size(), 0);
|
||||
assertEqualsOperators(pe2.getBetweenPrevPosition(), ops);
|
||||
Assert.assertNotNull(pe2.getPreviousOnGenomeCigarElement());
|
||||
Assert.assertEquals(pe2.getPreviousOnGenomeCigarElement().getOperator(), firstOp);
|
||||
Assert.assertEquals(pe2.getNextOnGenomeCigarElement(), null);
|
||||
}
|
||||
|
||||
private void assertEqualsOperators(final List<CigarElement> elements, final List<CigarOperator> ops) {
|
||||
for ( int i = 0; i < elements.size(); i++ ) {
|
||||
Assert.assertEquals(elements.get(i).getOperator(), ops.get(i), "elements doesn't have expected operator at position " + i);
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue