Near final cleanup of PileupElement

-- All functions documented and unit tested
-- New constructor interface
-- Cleanup some uses of old / removed functionality
This commit is contained in:
Mark DePristo 2013-01-10 12:17:48 -05:00
parent fb9eb3d4ee
commit 8b83f4d6c7
10 changed files with 470 additions and 80 deletions

View File

@ -169,8 +169,7 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR
* @return true if this base is part of a meaningful read for comparison, false otherwise
*/
public static boolean isUsableBase(final PileupElement p, final boolean allowDeletions) {
return !(p.isInsertionAtBeginningOfRead() ||
(! allowDeletions && p.isDeletion()) ||
return !((! allowDeletions && p.isDeletion()) ||
p.getMappingQual() == 0 ||
p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE ||
((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE); // need the unBAQed quality score here

View File

@ -323,22 +323,12 @@ public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLi
public ReadBackedPileup createBAQedPileup( final ReadBackedPileup pileup ) {
final List<PileupElement> BAQedElements = new ArrayList<PileupElement>();
for( final PileupElement PE : pileup ) {
final PileupElement newPE = new BAQedPileupElement( PE );
final PileupElement newPE = new SNPGenotypeLikelihoodsCalculationModel.BAQedPileupElement( PE );
BAQedElements.add( newPE );
}
return new ReadBackedPileupImpl( pileup.getLocation(), BAQedElements );
}
public class BAQedPileupElement extends PileupElement {
public BAQedPileupElement( final PileupElement PE ) {
super(PE);
}
@Override
public byte getQual( final int offset ) { return BAQ.calcBAQFromTag(getRead(), offset, true); }
}
/**
* Helper function that returns the phred-scaled base quality score we should use for calculating
* likelihoods for a pileup element. May return 0 to indicate that the observation is bad, and may

View File

@ -252,7 +252,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
protected int getFilteredDepth(ReadBackedPileup pileup) {
int count = 0;
for (PileupElement p : pileup) {
if (p.isDeletion() || p.isInsertionAtBeginningOfRead() || BaseUtils.isRegularBase(p.getBase()))
if (p.isDeletion() || BaseUtils.isRegularBase(p.getBase()))
count += p.getRepresentativeCount();
}

View File

@ -241,7 +241,12 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
}
@Override
public byte getQual( final int offset ) { return BAQ.calcBAQFromTag(getRead(), offset, true); }
public byte getQual() {
if ( isDeletion() )
return super.getQual();
else
return BAQ.calcBAQFromTag(getRead(), offset, true);
}
}
private static class SampleGenotypeData {

View File

@ -57,7 +57,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
"currentCigarElementOffset >= -1",
"currentCigarElementOffset <= nCigarElements"
})
class AlignmentStateMachine {
public class AlignmentStateMachine {
/**
* Our read
*/

View File

@ -25,6 +25,11 @@ public abstract class LocusIterator implements Iterable<AlignmentContext>, Close
public abstract boolean hasNext();
public abstract AlignmentContext next();
// TODO -- remove me when ART testing is done
public LocusIteratorByState getLIBS() {
return null;
}
public void remove() {
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
}

View File

@ -47,6 +47,11 @@ import java.util.List;
* Time: 8:54:05 AM
*/
public class PileupElement implements Comparable<PileupElement> {
private final static LinkedList<CigarElement> EMPTY_LINKED_LIST = new LinkedList<CigarElement>();
private final static EnumSet<CigarOperator> ON_GENOME_OPERATORS =
EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X, CigarOperator.D);
public static final byte DELETION_BASE = BaseUtils.D;
public static final byte DELETION_QUAL = (byte) 16;
public static final byte A_FOLLOWED_BY_INSERTION_BASE = (byte) 87;
@ -90,13 +95,34 @@ public class PileupElement implements Comparable<PileupElement> {
currentCigarOffset = offsetInCurrentCigar = -1;
}
/**
* Create a new pileup element
*
* @param read a non-null read to pileup
* @param baseOffset the offset into the read's base / qual vector aligned to this position on the genome. If the
* current cigar element is a deletion, offset should be the offset of the last M/=/X position.
* @param currentElement a non-null CigarElement that indicates the cigar element aligning the read to the genome
* @param currentCigarOffset the offset of currentElement in read.getCigar().getElement(currentCigarOffset) == currentElement)
* @param offsetInCurrentCigar how far into the currentElement are we in our alignment to the genome?
*/
public PileupElement(final GATKSAMRecord read, final int baseOffset,
final CigarElement currentElement, final int currentCigarOffset, final int offsetInCurrentCigar) {
final CigarElement currentElement, final int currentCigarOffset,
final int offsetInCurrentCigar) {
assert currentElement != null;
this.read = read;
this.offset = baseOffset;
this.currentCigarElement = currentElement;
this.currentCigarOffset = currentCigarOffset;
this.offsetInCurrentCigar = offsetInCurrentCigar;
// for performance regions these are assertions
assert this.read != null;
assert this.offset >= 0 && this.offset < this.read.getReadLength();
assert this.currentCigarOffset >= 0;
assert this.currentCigarOffset < read.getCigarLength();
assert this.offsetInCurrentCigar >= 0;
assert this.offsetInCurrentCigar < currentElement.getLength();
}
/**
@ -112,50 +138,100 @@ public class PileupElement implements Comparable<PileupElement> {
throw new UnsupportedOperationException("please use LocusIteratorByState.createPileupForReadAndOffset instead");
}
/**
* Is this element a deletion w.r.t. the reference genome?
*
* @return true if this is a deletion, false otherwise
*/
public boolean isDeletion() {
return currentCigarElement.getOperator() == CigarOperator.D;
}
/**
* Is the current element immediately before a deletion, but itself not a deletion?
*
* Suppose we are aligning a read with cigar 3M2D1M. This function is true
* if we are in the last cigar position of the 3M, but not if we are in the 2D itself.
*
* @return true if the next alignment position is a deletion w.r.t. the reference genome
*/
public boolean isBeforeDeletionStart() {
return isBeforeDeletion() && ! isDeletion();
return ! isDeletion() && atEndOfCurrentCigar() && hasOperator(getNextOnGenomeCigarElement(), CigarOperator.D);
}
/**
* Is the current element immediately after a deletion, but itself not a deletion?
*
* Suppose we are aligning a read with cigar 1M2D3M. This function is true
* if we are in the first cigar position of the 3M, but not if we are in the 2D itself or
* in any but the first position of the 3M.
*
* @return true if the previous alignment position is a deletion w.r.t. the reference genome
*/
public boolean isAfterDeletionEnd() {
return isAfterDeletion() && ! isDeletion();
}
public boolean isInsertionAtBeginningOfRead() {
return offset == -1;
return ! isDeletion() && atStartOfCurrentCigar() && hasOperator(getPreviousOnGenomeCigarElement(), CigarOperator.D);
}
/**
* Get the read for this pileup element
* @return a non-null GATKSAMRecord
*/
@Ensures("result != null")
public GATKSAMRecord getRead() {
return read;
}
@Ensures("result == offset")
/**
* Get the offset of the this element into the read that aligns that read's base to this genomic position.
*
* If the current element is a deletion then offset is the offset of the last base containing offset.
*
* @return a valid offset into the read's bases
*/
@Ensures({"result >= 0", "result <= read.getReadLength()"})
public int getOffset() {
return offset;
}
/**
* Get the base aligned to the genome at this location
*
* If the current element is a deletion returns DELETION_BASE
*
* @return a base encoded as a byte
*/
@Ensures("result != DELETION_BASE || (isDeletion() && result == DELETION_BASE)")
public byte getBase() {
return getBase(offset);
return isDeletion() ? DELETION_BASE : read.getReadBases()[offset];
}
@Deprecated
public int getBaseIndex() {
return getBaseIndex(offset);
return BaseUtils.simpleBaseToBaseIndex(getBase());
}
/**
* Get the base quality score of the base at this aligned position on the genome
* @return a phred-scaled quality score as a byte
*/
public byte getQual() {
return getQual(offset);
return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset];
}
/**
* Get the Base Insertion quality at this pileup position
* @return a phred-scaled quality score as a byte
*/
public byte getBaseInsertionQual() {
return getBaseInsertionQual(offset);
return isDeletion() ? DELETION_QUAL : read.getBaseInsertionQualities()[offset];
}
/**
* Get the Base Deletion quality at this pileup position
* @return a phred-scaled quality score as a byte
*/
public byte getBaseDeletionQual() {
return getBaseDeletionQual(offset);
return isDeletion() ? DELETION_QUAL : read.getBaseDeletionQualities()[offset];
}
/**
@ -222,6 +298,10 @@ public class PileupElement implements Comparable<PileupElement> {
return null;
}
/**
* Get the mapping quality of the read of this element
* @return the mapping quality of the underlying SAM record
*/
public int getMappingQual() {
return read.getMappingQuality();
}
@ -231,26 +311,6 @@ public class PileupElement implements Comparable<PileupElement> {
return String.format("%s @ %d = %c Q%d", getRead().getReadName(), getOffset(), (char) getBase(), getQual());
}
protected byte getBase(final int offset) {
return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_BASE : read.getReadBases()[offset];
}
protected int getBaseIndex(final int offset) {
return BaseUtils.simpleBaseToBaseIndex((isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_BASE : read.getReadBases()[offset]);
}
protected byte getQual(final int offset) {
return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_QUAL : read.getBaseQualities()[offset];
}
protected byte getBaseInsertionQual(final int offset) {
return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_QUAL : read.getBaseInsertionQualities()[offset];
}
protected byte getBaseDeletionQual(final int offset) {
return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_QUAL : read.getBaseDeletionQualities()[offset];
}
@Override
public int compareTo(final PileupElement pileupElement) {
if (offset < pileupElement.offset)
@ -281,44 +341,94 @@ public class PileupElement implements Comparable<PileupElement> {
* @return
*/
public int getRepresentativeCount() {
int representativeCount = 1;
if (read.isReducedRead() && !isInsertionAtBeginningOfRead()) {
if (read.isReducedRead()) {
if (isDeletion() && (offset + 1 >= read.getReadLength()) ) // deletion in the end of the read
throw new UserException.MalformedBAM(read, String.format("Adjacent I/D events in read %s -- cigar: %s", read.getReadName(), read.getCigarString()));
representativeCount = (isDeletion()) ? MathUtils.fastRound((read.getReducedCount(offset) + read.getReducedCount(offset + 1)) / 2.0) : read.getReducedCount(offset);
return isDeletion()
? MathUtils.fastRound((read.getReducedCount(offset) + read.getReducedCount(offset + 1)) / 2.0)
: read.getReducedCount(offset);
} else {
return 1;
}
return representativeCount;
}
/**
* Get the cigar element aligning this element to the genome
* @return a non-null CigarElement
*/
@Ensures("result != null")
public CigarElement getCurrentCigarElement() {
return currentCigarElement;
}
/**
* Get the offset of this cigar element in the Cigar of the current read (0-based)
*
* Suppose the cigar is 1M2D3I4D. If we are in the 1M state this function returns
* 0. If we are in 2D, the result is 1. If we are in the 4D, the result is 3.
*
* @return an offset into the read.getCigar() that brings us to the current cigar element
*/
public int getCurrentCigarOffset() {
return currentCigarOffset;
}
/**
* Get the offset into the *current* cigar element for this alignment position
*
* We can be anywhere from offset 0 (first position) to length - 1 of the current
* cigar element aligning us to this genomic position.
*
* @return a valid offset into the current cigar element
*/
@Ensures({"result >= 0", "result < getCurrentCigarElement().getLength()"})
public int getOffsetInCurrentCigar() {
return offsetInCurrentCigar;
}
/**
* Get the cigar elements that occur before the current position but after the previous position on the genome
*
* For example, if we are in the 3M state of 1M2I3M state then 2I occurs before this position.
*
* Note that this function does not care where we are in the current cigar element. In the previous
* example this list of elements contains the 2I state regardless of where you are in the 3M.
*
* Note this returns the list of all elements that occur between this and the prev site, so for
* example we might have 5S10I2M and this function would return [5S, 10I].
*
* @return a non-null list of CigarElements
*/
@Ensures("result != null")
public LinkedList<CigarElement> getBetweenPrevPosition() {
return atStartOfCurrentCigar() ? getBetween(-1) : EMPTY_LINKED_LIST;
return atStartOfCurrentCigar() ? getBetween(Direction.PREV) : EMPTY_LINKED_LIST;
}
/**
* Get the cigar elements that occur after the current position but before the next position on the genome
*
* @see #getBetweenPrevPosition() for more details
*
* @return a non-null list of CigarElements
*/
@Ensures("result != null")
public LinkedList<CigarElement> getBetweenNextPosition() {
return atEndOfCurrentCigar() ? getBetween(1) : EMPTY_LINKED_LIST;
return atEndOfCurrentCigar() ? getBetween(Direction.NEXT) : EMPTY_LINKED_LIST;
}
// TODO -- can I make this unmodifable?
private final static LinkedList<CigarElement> EMPTY_LINKED_LIST = new LinkedList<CigarElement>();
/** for some helper functions */
private enum Direction { PREV, NEXT }
private final static EnumSet<CigarOperator> ON_GENOME_OPERATORS =
EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X, CigarOperator.D);
private LinkedList<CigarElement> getBetween(final int increment) {
/**
* Helper function to get cigar elements between this and either the prev or next genomic position
*
* @param direction PREVIOUS if we want before, NEXT if we want after
* @return a non-null list of cigar elements between this and the neighboring position in direction
*/
@Ensures("result != null")
private LinkedList<CigarElement> getBetween(final Direction direction) {
final int increment = direction == Direction.NEXT ? 1 : -1;
LinkedList<CigarElement> elements = null;
final int nCigarElements = read.getCigarLength();
for ( int i = currentCigarOffset + increment; i >= 0 && i < nCigarElements; i += increment) {
@ -343,15 +453,42 @@ public class PileupElement implements Comparable<PileupElement> {
return elements == null ? EMPTY_LINKED_LIST : elements;
}
/**
* Get the cigar element of the previous genomic aligned position
*
* For example, we might have 1M2I3M, and be sitting at the someone in the 3M. This
* function would return 1M, as the 2I isn't on the genome. Note this function skips
* all of the positions that would occur in the current element. So the result
* is always 1M regardless of whether we're in the first, second, or third position of the 3M
* cigar.
*
* @return a CigarElement, or null (indicating that no previous element exists)
*/
@Ensures("result == null || ON_GENOME_OPERATORS.contains(result.getOperator())")
public CigarElement getPreviousOnGenomeCigarElement() {
return getNeighboringOnGenomeCigarElement(-1);
return getNeighboringOnGenomeCigarElement(Direction.PREV);
}
/**
* Get the cigar element of the next genomic aligned position
*
* @see #getPreviousOnGenomeCigarElement() for more details
*
* @return a CigarElement, or null (indicating that no next element exists)
*/
@Ensures("result == null || ON_GENOME_OPERATORS.contains(result.getOperator())")
public CigarElement getNextOnGenomeCigarElement() {
return getNeighboringOnGenomeCigarElement(1);
return getNeighboringOnGenomeCigarElement(Direction.NEXT);
}
private CigarElement getNeighboringOnGenomeCigarElement(final int increment) {
/**
* Helper function to get the cigar element of the next or previous genomic position
* @param direction the direction to look in
* @return a CigarElement, or null if no such element exists
*/
@Ensures("result == null || ON_GENOME_OPERATORS.contains(result.getOperator())")
private CigarElement getNeighboringOnGenomeCigarElement(final Direction direction) {
final int increment = direction == Direction.NEXT ? 1 : -1;
final int nCigarElements = read.getCigarLength();
for ( int i = currentCigarOffset + increment; i >= 0 && i < nCigarElements; i += increment) {
@ -364,31 +501,97 @@ public class PileupElement implements Comparable<PileupElement> {
return null;
}
/**
* Does the cigar element (which may be null) have operation toMatch?
*
* @param maybeCigarElement a CigarElement that might be null
* @param toMatch a CigarOperator we want to match against the one in maybeCigarElement
* @return true if maybeCigarElement isn't null and has operator toMatch
*/
@Requires("toMatch != null")
private boolean hasOperator(final CigarElement maybeCigarElement, final CigarOperator toMatch) {
return maybeCigarElement != null && maybeCigarElement.getOperator() == toMatch;
}
public boolean isAfterDeletion() { return atStartOfCurrentCigar() && hasOperator(getPreviousOnGenomeCigarElement(), CigarOperator.D); }
public boolean isBeforeDeletion() { return atEndOfCurrentCigar() && hasOperator(getNextOnGenomeCigarElement(), CigarOperator.D); }
/**
* Does an insertion occur immediately before the current position on the genome?
*
* @return true if yes, false if no
*/
public boolean isAfterInsertion() { return isAfter(getBetweenPrevPosition(), CigarOperator.I); }
/**
* Does an insertion occur immediately after the current position on the genome?
*
* @return true if yes, false if no
*/
public boolean isBeforeInsertion() { return isBefore(getBetweenNextPosition(), CigarOperator.I); }
/**
* Does a soft-clipping event occur immediately before the current position on the genome?
*
* @return true if yes, false if no
*/
public boolean isAfterSoftClip() { return isAfter(getBetweenPrevPosition(), CigarOperator.S); }
/**
* Does a soft-clipping event occur immediately after the current position on the genome?
*
* @return true if yes, false if no
*/
public boolean isBeforeSoftClip() { return isBefore(getBetweenNextPosition(), CigarOperator.S); }
/**
* Does a soft-clipping event occur immediately before or after the current position on the genome?
*
* @return true if yes, false if no
*/
public boolean isNextToSoftClip() { return isAfterSoftClip() || isBeforeSoftClip(); }
/**
* Is the current position at the end of the current cigar?
*
* For example, if we are in element 3M, this function returns true if we are at offsetInCurrentCigar
* of 2, but not 0 or 1.
*
* @return true if we're at the end of the current cigar
*/
public boolean atEndOfCurrentCigar() {
return offsetInCurrentCigar == currentCigarElement.getLength() - 1;
}
/**
* Is the current position at the start of the current cigar?
*
* For example, if we are in element 3M, this function returns true if we are at offsetInCurrentCigar
* of 0, but not 1 or 2.
*
* @return true if we're at the start of the current cigar
*/
public boolean atStartOfCurrentCigar() {
return offsetInCurrentCigar == 0;
}
/**
* Is op the last element in the list of elements?
*
* @param elements the elements to examine
* @param op the op we want the last element's op to equal
* @return true if op == last(elements).op
*/
@Requires({"elements != null", "op != null"})
private boolean isAfter(final LinkedList<CigarElement> elements, final CigarOperator op) {
return ! elements.isEmpty() && elements.peekLast().getOperator() == op;
}
/**
* Is op the first element in the list of elements?
*
* @param elements the elements to examine
* @param op the op we want the last element's op to equal
* @return true if op == first(elements).op
*/
@Requires({"elements != null", "op != null"})
private boolean isBefore(final List<CigarElement> elements, final CigarOperator op) {
return ! elements.isEmpty() && elements.get(0).getOperator() == op;
}

View File

@ -297,7 +297,7 @@ public class AlignmentUtils {
}
public static int calcAlignmentByteArrayOffset(final Cigar cigar, final PileupElement pileupElement, final int alignmentStart, final int refLocus) {
return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), pileupElement.isInsertionAtBeginningOfRead(), pileupElement.isDeletion(), alignmentStart, refLocus );
return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), false, pileupElement.isDeletion(), alignmentStart, refLocus );
}
public static int calcAlignmentByteArrayOffset(final Cigar cigar, final int offset, final boolean isInsertionAtBeginningOfRead, final boolean isDeletion, final int alignmentStart, final int refLocus) {

View File

@ -123,24 +123,21 @@ public class LocusIteratorByStateBaseTest extends BaseTest {
protected static class LIBSTest {
public static final int locus = 44367788;
final String cigar;
final String cigarString;
final int readLength;
final private List<CigarElement> elements;
public LIBSTest(final String cigar, final int readLength) {
this(TextCigarCodec.getSingleton().decode(cigar).getCigarElements(), cigar, readLength);
}
public LIBSTest(final List<CigarElement> elements, final String cigar, final int readLength) {
this.elements = elements;
this.cigar = cigar;
this.readLength = readLength;
public LIBSTest(final String cigarString) {
final Cigar cigar = TextCigarCodec.getSingleton().decode(cigarString);
this.cigarString = cigarString;
this.elements = cigar.getCigarElements();
this.readLength = cigar.getReadLength();
}
@Override
public String toString() {
return "LIBSTest{" +
"cigar='" + cigar + '\'' +
"cigar='" + cigarString + '\'' +
", readLength=" + readLength +
'}';
}
@ -156,7 +153,7 @@ public class LocusIteratorByStateBaseTest extends BaseTest {
for ( int i = 0; i < readLength; i++ )
quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE);
read.setBaseQualities(quals);
read.setCigarString(cigar);
read.setCigarString(cigarString);
return read;
}
}
@ -220,7 +217,7 @@ public class LocusIteratorByStateBaseTest extends BaseTest {
! (last.getOperator() == CigarOperator.I || last.getOperator() == CigarOperator.S))
return null;
return new LIBSTest(elements, cigar, len);
return new LIBSTest(cigar);
}
@DataProvider(name = "LIBSTest")

View File

@ -0,0 +1,191 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.pileup;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.locusiterator.AlignmentStateMachine;
import org.broadinstitute.sting.utils.locusiterator.LIBS_position;
import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByStateBaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
/**
* testing of the new (non-legacy) version of LocusIteratorByState
*/
public class PileupElementUnitTest extends LocusIteratorByStateBaseTest {
@DataProvider(name = "PileupElementTest")
public Object[][] makePileupElementTest() {
// return new Object[][]{{new LIBSTest("2X2D2P2X")}};
// return createLIBSTests(
// Arrays.asList(2),
// Arrays.asList(2));
return createLIBSTests(
Arrays.asList(1, 2),
Arrays.asList(1, 2, 3, 4));
}
@Test(dataProvider = "PileupElementTest")
public void testPileupElementTest(LIBSTest params) {
final GATKSAMRecord read = params.makeRead();
final AlignmentStateMachine state = new AlignmentStateMachine(read);
final LIBS_position tester = new LIBS_position(read);
while ( state.stepForwardOnGenome() != null ) {
tester.stepForwardOnGenome();
final PileupElement pe = state.makePileupElement();
Assert.assertEquals(pe.getRead(), read);
Assert.assertEquals(pe.getMappingQual(), read.getMappingQuality());
Assert.assertEquals(pe.getOffset(), state.getReadOffset());
Assert.assertEquals(pe.isDeletion(), state.getCigarOperator() == CigarOperator.D);
Assert.assertEquals(pe.isAfterInsertion(), tester.isAfterInsertion);
Assert.assertEquals(pe.isBeforeInsertion(), tester.isBeforeInsertion);
Assert.assertEquals(pe.isNextToSoftClip(), tester.isNextToSoftClip);
if ( ! hasNeighboringPaddedOps(params.getElements(), pe.getCurrentCigarOffset()) ) {
Assert.assertEquals(pe.isAfterDeletionEnd(), tester.isAfterDeletionEnd);
Assert.assertEquals(pe.isBeforeDeletionStart(), tester.isBeforeDeletionStart);
}
Assert.assertEquals(pe.atEndOfCurrentCigar(), state.getOffsetIntoCurrentCigarElement() == state.getCurrentCigarElement().getLength() - 1, "atEndOfCurrentCigar failed");
Assert.assertEquals(pe.atStartOfCurrentCigar(), state.getOffsetIntoCurrentCigarElement() == 0, "atStartOfCurrentCigar failed");
Assert.assertEquals(pe.getBase(), pe.isDeletion() ? PileupElement.DELETION_BASE : read.getReadBases()[state.getReadOffset()]);
Assert.assertEquals(pe.getQual(), pe.isDeletion() ? PileupElement.DELETION_QUAL : read.getBaseQualities()[state.getReadOffset()]);
Assert.assertEquals(pe.getCurrentCigarElement(), state.getCurrentCigarElement());
Assert.assertEquals(pe.getCurrentCigarOffset(), state.getCurrentCigarElementOffset());
// tested in libs
//pe.getLengthOfImmediatelyFollowingIndel();
//pe.getBasesOfImmediatelyFollowingInsertion();
// Don't test -- pe.getBaseIndex();
if ( pe.atEndOfCurrentCigar() && state.getCurrentCigarElementOffset() < read.getCigarLength() - 1 ) {
final CigarElement nextElement = read.getCigar().getCigarElement(state.getCurrentCigarElementOffset() + 1);
if ( nextElement.getOperator() == CigarOperator.I ) {
Assert.assertTrue(pe.getBetweenNextPosition().size() >= 1);
Assert.assertEquals(pe.getBetweenNextPosition().get(0), nextElement);
}
if ( nextElement.getOperator() == CigarOperator.M ) {
Assert.assertTrue(pe.getBetweenNextPosition().isEmpty());
}
} else {
Assert.assertTrue(pe.getBetweenNextPosition().isEmpty());
}
if ( pe.atStartOfCurrentCigar() && state.getCurrentCigarElementOffset() > 0 ) {
final CigarElement prevElement = read.getCigar().getCigarElement(state.getCurrentCigarElementOffset() - 1);
if ( prevElement.getOperator() == CigarOperator.I ) {
Assert.assertTrue(pe.getBetweenPrevPosition().size() >= 1);
Assert.assertEquals(pe.getBetweenPrevPosition().getLast(), prevElement);
}
if ( prevElement.getOperator() == CigarOperator.M ) {
Assert.assertTrue(pe.getBetweenPrevPosition().isEmpty());
}
} else {
Assert.assertTrue(pe.getBetweenPrevPosition().isEmpty());
}
// TODO -- add meaningful tests
pe.getBaseInsertionQual();
pe.getBaseDeletionQual();
pe.getRepresentativeCount();
}
}
@DataProvider(name = "PrevAndNextTest")
public Object[][] makePrevAndNextTest() {
final List<Object[]> tests = new LinkedList<Object[]>();
final List<CigarOperator> operators = Arrays.asList(CigarOperator.I, CigarOperator.P, CigarOperator.S);
for ( final CigarOperator firstOp : Arrays.asList(CigarOperator.M) ) {
for ( final CigarOperator lastOp : Arrays.asList(CigarOperator.M, CigarOperator.D) ) {
for ( final int nIntermediate : Arrays.asList(1, 2, 3) ) {
for ( final List<CigarOperator> combination : Utils.makePermutations(operators, nIntermediate, false) ) {
final int readLength = 2 + combination.size();
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, readLength);
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
String cigar = "1" + firstOp;
for ( final CigarOperator op : combination ) cigar += "1" + op;
cigar += "1" + lastOp;
read.setCigarString(cigar);
tests.add(new Object[]{read, firstOp, lastOp, combination});
}
}
}
}
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "PrevAndNextTest")
public void testPrevAndNextTest(final GATKSAMRecord read, final CigarOperator firstOp, final CigarOperator lastOp, final List<CigarOperator> ops) {
final AlignmentStateMachine state = new AlignmentStateMachine(read);
state.stepForwardOnGenome();
final PileupElement pe = state.makePileupElement();
Assert.assertEquals(pe.getBetweenNextPosition().size(), ops.size());
Assert.assertEquals(pe.getBetweenPrevPosition().size(), 0);
assertEqualsOperators(pe.getBetweenNextPosition(), ops);
Assert.assertEquals(pe.getPreviousOnGenomeCigarElement(), null);
Assert.assertNotNull(pe.getNextOnGenomeCigarElement());
Assert.assertEquals(pe.getNextOnGenomeCigarElement().getOperator(), lastOp);
state.stepForwardOnGenome();
final PileupElement pe2 = state.makePileupElement();
Assert.assertEquals(pe2.getBetweenPrevPosition().size(), ops.size());
Assert.assertEquals(pe2.getBetweenNextPosition().size(), 0);
assertEqualsOperators(pe2.getBetweenPrevPosition(), ops);
Assert.assertNotNull(pe2.getPreviousOnGenomeCigarElement());
Assert.assertEquals(pe2.getPreviousOnGenomeCigarElement().getOperator(), firstOp);
Assert.assertEquals(pe2.getNextOnGenomeCigarElement(), null);
}
private void assertEqualsOperators(final List<CigarElement> elements, final List<CigarOperator> ops) {
for ( int i = 0; i < elements.size(); i++ ) {
Assert.assertEquals(elements.get(i).getOperator(), ops.get(i), "elements doesn't have expected operator at position " + i);
}
}
}