diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index 959a26fba..ec107512a 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -169,8 +169,7 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR * @return true if this base is part of a meaningful read for comparison, false otherwise */ public static boolean isUsableBase(final PileupElement p, final boolean allowDeletions) { - return !(p.isInsertionAtBeginningOfRead() || - (! allowDeletions && p.isDeletion()) || + return !((! allowDeletions && p.isDeletion()) || p.getMappingQual() == 0 || p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE || ((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE); // need the unBAQed quality score here diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java index 44502f0aa..aa117eb3b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java @@ -323,22 +323,12 @@ public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLi public ReadBackedPileup createBAQedPileup( final ReadBackedPileup pileup ) { final List BAQedElements = new ArrayList(); for( final PileupElement PE : pileup ) { - final PileupElement newPE = new BAQedPileupElement( PE ); + final PileupElement newPE = new SNPGenotypeLikelihoodsCalculationModel.BAQedPileupElement( PE ); BAQedElements.add( newPE ); } return new ReadBackedPileupImpl( pileup.getLocation(), BAQedElements ); } - public class BAQedPileupElement extends PileupElement { - public BAQedPileupElement( final PileupElement PE ) { - super(PE); - } - - @Override - public byte getQual( final int offset ) { return BAQ.calcBAQFromTag(getRead(), offset, true); } - } - - /** * Helper function that returns the phred-scaled base quality score we should use for calculating * likelihoods for a pileup element. May return 0 to indicate that the observation is bad, and may diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 86000f236..84c109c9d 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -252,7 +252,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood protected int getFilteredDepth(ReadBackedPileup pileup) { int count = 0; for (PileupElement p : pileup) { - if (p.isDeletion() || p.isInsertionAtBeginningOfRead() || BaseUtils.isRegularBase(p.getBase())) + if (p.isDeletion() || BaseUtils.isRegularBase(p.getBase())) count += p.getRepresentativeCount(); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 72f8edc3e..7dc3e8ee3 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -241,7 +241,12 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC } @Override - public byte getQual( final int offset ) { return BAQ.calcBAQFromTag(getRead(), offset, true); } + public byte getQual() { + if ( isDeletion() ) + return super.getQual(); + else + return BAQ.calcBAQFromTag(getRead(), offset, true); + } } private static class SampleGenotypeData { diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachine.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachine.java index 98d438132..4f4c41b08 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachine.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachine.java @@ -57,7 +57,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; "currentCigarElementOffset >= -1", "currentCigarElementOffset <= nCigarElements" }) -class AlignmentStateMachine { +public class AlignmentStateMachine { /** * Our read */ diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIterator.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIterator.java index 0c218a36c..f830dcb30 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIterator.java @@ -25,6 +25,11 @@ public abstract class LocusIterator implements Iterable, Close public abstract boolean hasNext(); public abstract AlignmentContext next(); + // TODO -- remove me when ART testing is done + public LocusIteratorByState getLIBS() { + return null; + } + public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); } diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index 08665dfb7..830b09d52 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -47,6 +47,11 @@ import java.util.List; * Time: 8:54:05 AM */ public class PileupElement implements Comparable { + private final static LinkedList EMPTY_LINKED_LIST = new LinkedList(); + + private final static EnumSet ON_GENOME_OPERATORS = + EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X, CigarOperator.D); + public static final byte DELETION_BASE = BaseUtils.D; public static final byte DELETION_QUAL = (byte) 16; public static final byte A_FOLLOWED_BY_INSERTION_BASE = (byte) 87; @@ -90,13 +95,34 @@ public class PileupElement implements Comparable { currentCigarOffset = offsetInCurrentCigar = -1; } + /** + * Create a new pileup element + * + * @param read a non-null read to pileup + * @param baseOffset the offset into the read's base / qual vector aligned to this position on the genome. If the + * current cigar element is a deletion, offset should be the offset of the last M/=/X position. + * @param currentElement a non-null CigarElement that indicates the cigar element aligning the read to the genome + * @param currentCigarOffset the offset of currentElement in read.getCigar().getElement(currentCigarOffset) == currentElement) + * @param offsetInCurrentCigar how far into the currentElement are we in our alignment to the genome? + */ public PileupElement(final GATKSAMRecord read, final int baseOffset, - final CigarElement currentElement, final int currentCigarOffset, final int offsetInCurrentCigar) { + final CigarElement currentElement, final int currentCigarOffset, + final int offsetInCurrentCigar) { + assert currentElement != null; + this.read = read; this.offset = baseOffset; this.currentCigarElement = currentElement; this.currentCigarOffset = currentCigarOffset; this.offsetInCurrentCigar = offsetInCurrentCigar; + + // for performance regions these are assertions + assert this.read != null; + assert this.offset >= 0 && this.offset < this.read.getReadLength(); + assert this.currentCigarOffset >= 0; + assert this.currentCigarOffset < read.getCigarLength(); + assert this.offsetInCurrentCigar >= 0; + assert this.offsetInCurrentCigar < currentElement.getLength(); } /** @@ -112,50 +138,100 @@ public class PileupElement implements Comparable { throw new UnsupportedOperationException("please use LocusIteratorByState.createPileupForReadAndOffset instead"); } + /** + * Is this element a deletion w.r.t. the reference genome? + * + * @return true if this is a deletion, false otherwise + */ public boolean isDeletion() { return currentCigarElement.getOperator() == CigarOperator.D; } + /** + * Is the current element immediately before a deletion, but itself not a deletion? + * + * Suppose we are aligning a read with cigar 3M2D1M. This function is true + * if we are in the last cigar position of the 3M, but not if we are in the 2D itself. + * + * @return true if the next alignment position is a deletion w.r.t. the reference genome + */ public boolean isBeforeDeletionStart() { - return isBeforeDeletion() && ! isDeletion(); + return ! isDeletion() && atEndOfCurrentCigar() && hasOperator(getNextOnGenomeCigarElement(), CigarOperator.D); } + /** + * Is the current element immediately after a deletion, but itself not a deletion? + * + * Suppose we are aligning a read with cigar 1M2D3M. This function is true + * if we are in the first cigar position of the 3M, but not if we are in the 2D itself or + * in any but the first position of the 3M. + * + * @return true if the previous alignment position is a deletion w.r.t. the reference genome + */ public boolean isAfterDeletionEnd() { - return isAfterDeletion() && ! isDeletion(); - } - - public boolean isInsertionAtBeginningOfRead() { - return offset == -1; + return ! isDeletion() && atStartOfCurrentCigar() && hasOperator(getPreviousOnGenomeCigarElement(), CigarOperator.D); } + /** + * Get the read for this pileup element + * @return a non-null GATKSAMRecord + */ @Ensures("result != null") public GATKSAMRecord getRead() { return read; } - @Ensures("result == offset") + /** + * Get the offset of the this element into the read that aligns that read's base to this genomic position. + * + * If the current element is a deletion then offset is the offset of the last base containing offset. + * + * @return a valid offset into the read's bases + */ + @Ensures({"result >= 0", "result <= read.getReadLength()"}) public int getOffset() { return offset; } + /** + * Get the base aligned to the genome at this location + * + * If the current element is a deletion returns DELETION_BASE + * + * @return a base encoded as a byte + */ + @Ensures("result != DELETION_BASE || (isDeletion() && result == DELETION_BASE)") public byte getBase() { - return getBase(offset); + return isDeletion() ? DELETION_BASE : read.getReadBases()[offset]; } + @Deprecated public int getBaseIndex() { - return getBaseIndex(offset); + return BaseUtils.simpleBaseToBaseIndex(getBase()); } + /** + * Get the base quality score of the base at this aligned position on the genome + * @return a phred-scaled quality score as a byte + */ public byte getQual() { - return getQual(offset); + return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset]; } + /** + * Get the Base Insertion quality at this pileup position + * @return a phred-scaled quality score as a byte + */ public byte getBaseInsertionQual() { - return getBaseInsertionQual(offset); + return isDeletion() ? DELETION_QUAL : read.getBaseInsertionQualities()[offset]; } + /** + * Get the Base Deletion quality at this pileup position + * @return a phred-scaled quality score as a byte + */ public byte getBaseDeletionQual() { - return getBaseDeletionQual(offset); + return isDeletion() ? DELETION_QUAL : read.getBaseDeletionQualities()[offset]; } /** @@ -222,6 +298,10 @@ public class PileupElement implements Comparable { return null; } + /** + * Get the mapping quality of the read of this element + * @return the mapping quality of the underlying SAM record + */ public int getMappingQual() { return read.getMappingQuality(); } @@ -231,26 +311,6 @@ public class PileupElement implements Comparable { return String.format("%s @ %d = %c Q%d", getRead().getReadName(), getOffset(), (char) getBase(), getQual()); } - protected byte getBase(final int offset) { - return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_BASE : read.getReadBases()[offset]; - } - - protected int getBaseIndex(final int offset) { - return BaseUtils.simpleBaseToBaseIndex((isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_BASE : read.getReadBases()[offset]); - } - - protected byte getQual(final int offset) { - return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_QUAL : read.getBaseQualities()[offset]; - } - - protected byte getBaseInsertionQual(final int offset) { - return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_QUAL : read.getBaseInsertionQualities()[offset]; - } - - protected byte getBaseDeletionQual(final int offset) { - return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_QUAL : read.getBaseDeletionQualities()[offset]; - } - @Override public int compareTo(final PileupElement pileupElement) { if (offset < pileupElement.offset) @@ -281,44 +341,94 @@ public class PileupElement implements Comparable { * @return */ public int getRepresentativeCount() { - int representativeCount = 1; - - if (read.isReducedRead() && !isInsertionAtBeginningOfRead()) { + if (read.isReducedRead()) { if (isDeletion() && (offset + 1 >= read.getReadLength()) ) // deletion in the end of the read throw new UserException.MalformedBAM(read, String.format("Adjacent I/D events in read %s -- cigar: %s", read.getReadName(), read.getCigarString())); - representativeCount = (isDeletion()) ? MathUtils.fastRound((read.getReducedCount(offset) + read.getReducedCount(offset + 1)) / 2.0) : read.getReducedCount(offset); + return isDeletion() + ? MathUtils.fastRound((read.getReducedCount(offset) + read.getReducedCount(offset + 1)) / 2.0) + : read.getReducedCount(offset); + } else { + return 1; } - return representativeCount; } + /** + * Get the cigar element aligning this element to the genome + * @return a non-null CigarElement + */ + @Ensures("result != null") public CigarElement getCurrentCigarElement() { return currentCigarElement; } + /** + * Get the offset of this cigar element in the Cigar of the current read (0-based) + * + * Suppose the cigar is 1M2D3I4D. If we are in the 1M state this function returns + * 0. If we are in 2D, the result is 1. If we are in the 4D, the result is 3. + * + * @return an offset into the read.getCigar() that brings us to the current cigar element + */ public int getCurrentCigarOffset() { return currentCigarOffset; } + /** + * Get the offset into the *current* cigar element for this alignment position + * + * We can be anywhere from offset 0 (first position) to length - 1 of the current + * cigar element aligning us to this genomic position. + * + * @return a valid offset into the current cigar element + */ + @Ensures({"result >= 0", "result < getCurrentCigarElement().getLength()"}) public int getOffsetInCurrentCigar() { return offsetInCurrentCigar; } + /** + * Get the cigar elements that occur before the current position but after the previous position on the genome + * + * For example, if we are in the 3M state of 1M2I3M state then 2I occurs before this position. + * + * Note that this function does not care where we are in the current cigar element. In the previous + * example this list of elements contains the 2I state regardless of where you are in the 3M. + * + * Note this returns the list of all elements that occur between this and the prev site, so for + * example we might have 5S10I2M and this function would return [5S, 10I]. + * + * @return a non-null list of CigarElements + */ + @Ensures("result != null") public LinkedList getBetweenPrevPosition() { - return atStartOfCurrentCigar() ? getBetween(-1) : EMPTY_LINKED_LIST; + return atStartOfCurrentCigar() ? getBetween(Direction.PREV) : EMPTY_LINKED_LIST; } + /** + * Get the cigar elements that occur after the current position but before the next position on the genome + * + * @see #getBetweenPrevPosition() for more details + * + * @return a non-null list of CigarElements + */ + @Ensures("result != null") public LinkedList getBetweenNextPosition() { - return atEndOfCurrentCigar() ? getBetween(1) : EMPTY_LINKED_LIST; + return atEndOfCurrentCigar() ? getBetween(Direction.NEXT) : EMPTY_LINKED_LIST; } - // TODO -- can I make this unmodifable? - private final static LinkedList EMPTY_LINKED_LIST = new LinkedList(); + /** for some helper functions */ + private enum Direction { PREV, NEXT } - private final static EnumSet ON_GENOME_OPERATORS = - EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X, CigarOperator.D); - - private LinkedList getBetween(final int increment) { + /** + * Helper function to get cigar elements between this and either the prev or next genomic position + * + * @param direction PREVIOUS if we want before, NEXT if we want after + * @return a non-null list of cigar elements between this and the neighboring position in direction + */ + @Ensures("result != null") + private LinkedList getBetween(final Direction direction) { + final int increment = direction == Direction.NEXT ? 1 : -1; LinkedList elements = null; final int nCigarElements = read.getCigarLength(); for ( int i = currentCigarOffset + increment; i >= 0 && i < nCigarElements; i += increment) { @@ -343,15 +453,42 @@ public class PileupElement implements Comparable { return elements == null ? EMPTY_LINKED_LIST : elements; } + /** + * Get the cigar element of the previous genomic aligned position + * + * For example, we might have 1M2I3M, and be sitting at the someone in the 3M. This + * function would return 1M, as the 2I isn't on the genome. Note this function skips + * all of the positions that would occur in the current element. So the result + * is always 1M regardless of whether we're in the first, second, or third position of the 3M + * cigar. + * + * @return a CigarElement, or null (indicating that no previous element exists) + */ + @Ensures("result == null || ON_GENOME_OPERATORS.contains(result.getOperator())") public CigarElement getPreviousOnGenomeCigarElement() { - return getNeighboringOnGenomeCigarElement(-1); + return getNeighboringOnGenomeCigarElement(Direction.PREV); } + /** + * Get the cigar element of the next genomic aligned position + * + * @see #getPreviousOnGenomeCigarElement() for more details + * + * @return a CigarElement, or null (indicating that no next element exists) + */ + @Ensures("result == null || ON_GENOME_OPERATORS.contains(result.getOperator())") public CigarElement getNextOnGenomeCigarElement() { - return getNeighboringOnGenomeCigarElement(1); + return getNeighboringOnGenomeCigarElement(Direction.NEXT); } - private CigarElement getNeighboringOnGenomeCigarElement(final int increment) { + /** + * Helper function to get the cigar element of the next or previous genomic position + * @param direction the direction to look in + * @return a CigarElement, or null if no such element exists + */ + @Ensures("result == null || ON_GENOME_OPERATORS.contains(result.getOperator())") + private CigarElement getNeighboringOnGenomeCigarElement(final Direction direction) { + final int increment = direction == Direction.NEXT ? 1 : -1; final int nCigarElements = read.getCigarLength(); for ( int i = currentCigarOffset + increment; i >= 0 && i < nCigarElements; i += increment) { @@ -364,31 +501,97 @@ public class PileupElement implements Comparable { return null; } + /** + * Does the cigar element (which may be null) have operation toMatch? + * + * @param maybeCigarElement a CigarElement that might be null + * @param toMatch a CigarOperator we want to match against the one in maybeCigarElement + * @return true if maybeCigarElement isn't null and has operator toMatch + */ + @Requires("toMatch != null") private boolean hasOperator(final CigarElement maybeCigarElement, final CigarOperator toMatch) { return maybeCigarElement != null && maybeCigarElement.getOperator() == toMatch; } - public boolean isAfterDeletion() { return atStartOfCurrentCigar() && hasOperator(getPreviousOnGenomeCigarElement(), CigarOperator.D); } - public boolean isBeforeDeletion() { return atEndOfCurrentCigar() && hasOperator(getNextOnGenomeCigarElement(), CigarOperator.D); } + /** + * Does an insertion occur immediately before the current position on the genome? + * + * @return true if yes, false if no + */ public boolean isAfterInsertion() { return isAfter(getBetweenPrevPosition(), CigarOperator.I); } + + /** + * Does an insertion occur immediately after the current position on the genome? + * + * @return true if yes, false if no + */ public boolean isBeforeInsertion() { return isBefore(getBetweenNextPosition(), CigarOperator.I); } + /** + * Does a soft-clipping event occur immediately before the current position on the genome? + * + * @return true if yes, false if no + */ public boolean isAfterSoftClip() { return isAfter(getBetweenPrevPosition(), CigarOperator.S); } + + /** + * Does a soft-clipping event occur immediately after the current position on the genome? + * + * @return true if yes, false if no + */ public boolean isBeforeSoftClip() { return isBefore(getBetweenNextPosition(), CigarOperator.S); } + + /** + * Does a soft-clipping event occur immediately before or after the current position on the genome? + * + * @return true if yes, false if no + */ public boolean isNextToSoftClip() { return isAfterSoftClip() || isBeforeSoftClip(); } + /** + * Is the current position at the end of the current cigar? + * + * For example, if we are in element 3M, this function returns true if we are at offsetInCurrentCigar + * of 2, but not 0 or 1. + * + * @return true if we're at the end of the current cigar + */ public boolean atEndOfCurrentCigar() { return offsetInCurrentCigar == currentCigarElement.getLength() - 1; } + /** + * Is the current position at the start of the current cigar? + * + * For example, if we are in element 3M, this function returns true if we are at offsetInCurrentCigar + * of 0, but not 1 or 2. + * + * @return true if we're at the start of the current cigar + */ public boolean atStartOfCurrentCigar() { return offsetInCurrentCigar == 0; } + /** + * Is op the last element in the list of elements? + * + * @param elements the elements to examine + * @param op the op we want the last element's op to equal + * @return true if op == last(elements).op + */ + @Requires({"elements != null", "op != null"}) private boolean isAfter(final LinkedList elements, final CigarOperator op) { return ! elements.isEmpty() && elements.peekLast().getOperator() == op; } + /** + * Is op the first element in the list of elements? + * + * @param elements the elements to examine + * @param op the op we want the last element's op to equal + * @return true if op == first(elements).op + */ + @Requires({"elements != null", "op != null"}) private boolean isBefore(final List elements, final CigarOperator op) { return ! elements.isEmpty() && elements.get(0).getOperator() == op; } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index ca48b7327..0907a0239 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -297,7 +297,7 @@ public class AlignmentUtils { } public static int calcAlignmentByteArrayOffset(final Cigar cigar, final PileupElement pileupElement, final int alignmentStart, final int refLocus) { - return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), pileupElement.isInsertionAtBeginningOfRead(), pileupElement.isDeletion(), alignmentStart, refLocus ); + return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), false, pileupElement.isDeletion(), alignmentStart, refLocus ); } public static int calcAlignmentByteArrayOffset(final Cigar cigar, final int offset, final boolean isInsertionAtBeginningOfRead, final boolean isDeletion, final int alignmentStart, final int refLocus) { diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java index a23ea28e6..6445f976f 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java @@ -123,24 +123,21 @@ public class LocusIteratorByStateBaseTest extends BaseTest { protected static class LIBSTest { public static final int locus = 44367788; - final String cigar; + final String cigarString; final int readLength; final private List elements; - public LIBSTest(final String cigar, final int readLength) { - this(TextCigarCodec.getSingleton().decode(cigar).getCigarElements(), cigar, readLength); - } - - public LIBSTest(final List elements, final String cigar, final int readLength) { - this.elements = elements; - this.cigar = cigar; - this.readLength = readLength; + public LIBSTest(final String cigarString) { + final Cigar cigar = TextCigarCodec.getSingleton().decode(cigarString); + this.cigarString = cigarString; + this.elements = cigar.getCigarElements(); + this.readLength = cigar.getReadLength(); } @Override public String toString() { return "LIBSTest{" + - "cigar='" + cigar + '\'' + + "cigar='" + cigarString + '\'' + ", readLength=" + readLength + '}'; } @@ -156,7 +153,7 @@ public class LocusIteratorByStateBaseTest extends BaseTest { for ( int i = 0; i < readLength; i++ ) quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE); read.setBaseQualities(quals); - read.setCigarString(cigar); + read.setCigarString(cigarString); return read; } } @@ -220,7 +217,7 @@ public class LocusIteratorByStateBaseTest extends BaseTest { ! (last.getOperator() == CigarOperator.I || last.getOperator() == CigarOperator.S)) return null; - return new LIBSTest(elements, cigar, len); + return new LIBSTest(cigar); } @DataProvider(name = "LIBSTest") diff --git a/public/java/test/org/broadinstitute/sting/utils/pileup/PileupElementUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/pileup/PileupElementUnitTest.java new file mode 100644 index 000000000..a760833f5 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/pileup/PileupElementUnitTest.java @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.pileup; + +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.locusiterator.AlignmentStateMachine; +import org.broadinstitute.sting.utils.locusiterator.LIBS_position; +import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByStateBaseTest; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; + +/** + * testing of the new (non-legacy) version of LocusIteratorByState + */ +public class PileupElementUnitTest extends LocusIteratorByStateBaseTest { + @DataProvider(name = "PileupElementTest") + public Object[][] makePileupElementTest() { +// return new Object[][]{{new LIBSTest("2X2D2P2X")}}; +// return createLIBSTests( +// Arrays.asList(2), +// Arrays.asList(2)); + return createLIBSTests( + Arrays.asList(1, 2), + Arrays.asList(1, 2, 3, 4)); + } + + @Test(dataProvider = "PileupElementTest") + public void testPileupElementTest(LIBSTest params) { + final GATKSAMRecord read = params.makeRead(); + final AlignmentStateMachine state = new AlignmentStateMachine(read); + final LIBS_position tester = new LIBS_position(read); + + while ( state.stepForwardOnGenome() != null ) { + tester.stepForwardOnGenome(); + final PileupElement pe = state.makePileupElement(); + + Assert.assertEquals(pe.getRead(), read); + Assert.assertEquals(pe.getMappingQual(), read.getMappingQuality()); + Assert.assertEquals(pe.getOffset(), state.getReadOffset()); + + Assert.assertEquals(pe.isDeletion(), state.getCigarOperator() == CigarOperator.D); + Assert.assertEquals(pe.isAfterInsertion(), tester.isAfterInsertion); + Assert.assertEquals(pe.isBeforeInsertion(), tester.isBeforeInsertion); + Assert.assertEquals(pe.isNextToSoftClip(), tester.isNextToSoftClip); + + if ( ! hasNeighboringPaddedOps(params.getElements(), pe.getCurrentCigarOffset()) ) { + Assert.assertEquals(pe.isAfterDeletionEnd(), tester.isAfterDeletionEnd); + Assert.assertEquals(pe.isBeforeDeletionStart(), tester.isBeforeDeletionStart); + } + + + + Assert.assertEquals(pe.atEndOfCurrentCigar(), state.getOffsetIntoCurrentCigarElement() == state.getCurrentCigarElement().getLength() - 1, "atEndOfCurrentCigar failed"); + Assert.assertEquals(pe.atStartOfCurrentCigar(), state.getOffsetIntoCurrentCigarElement() == 0, "atStartOfCurrentCigar failed"); + + Assert.assertEquals(pe.getBase(), pe.isDeletion() ? PileupElement.DELETION_BASE : read.getReadBases()[state.getReadOffset()]); + Assert.assertEquals(pe.getQual(), pe.isDeletion() ? PileupElement.DELETION_QUAL : read.getBaseQualities()[state.getReadOffset()]); + + Assert.assertEquals(pe.getCurrentCigarElement(), state.getCurrentCigarElement()); + Assert.assertEquals(pe.getCurrentCigarOffset(), state.getCurrentCigarElementOffset()); + + // tested in libs + //pe.getLengthOfImmediatelyFollowingIndel(); + //pe.getBasesOfImmediatelyFollowingInsertion(); + + // Don't test -- pe.getBaseIndex(); + if ( pe.atEndOfCurrentCigar() && state.getCurrentCigarElementOffset() < read.getCigarLength() - 1 ) { + final CigarElement nextElement = read.getCigar().getCigarElement(state.getCurrentCigarElementOffset() + 1); + if ( nextElement.getOperator() == CigarOperator.I ) { + Assert.assertTrue(pe.getBetweenNextPosition().size() >= 1); + Assert.assertEquals(pe.getBetweenNextPosition().get(0), nextElement); + } + if ( nextElement.getOperator() == CigarOperator.M ) { + Assert.assertTrue(pe.getBetweenNextPosition().isEmpty()); + } + } else { + Assert.assertTrue(pe.getBetweenNextPosition().isEmpty()); + } + + if ( pe.atStartOfCurrentCigar() && state.getCurrentCigarElementOffset() > 0 ) { + final CigarElement prevElement = read.getCigar().getCigarElement(state.getCurrentCigarElementOffset() - 1); + if ( prevElement.getOperator() == CigarOperator.I ) { + Assert.assertTrue(pe.getBetweenPrevPosition().size() >= 1); + Assert.assertEquals(pe.getBetweenPrevPosition().getLast(), prevElement); + } + if ( prevElement.getOperator() == CigarOperator.M ) { + Assert.assertTrue(pe.getBetweenPrevPosition().isEmpty()); + } + } else { + Assert.assertTrue(pe.getBetweenPrevPosition().isEmpty()); + } + + // TODO -- add meaningful tests + pe.getBaseInsertionQual(); + pe.getBaseDeletionQual(); + pe.getRepresentativeCount(); + } + } + + + @DataProvider(name = "PrevAndNextTest") + public Object[][] makePrevAndNextTest() { + final List tests = new LinkedList(); + + final List operators = Arrays.asList(CigarOperator.I, CigarOperator.P, CigarOperator.S); + + for ( final CigarOperator firstOp : Arrays.asList(CigarOperator.M) ) { + for ( final CigarOperator lastOp : Arrays.asList(CigarOperator.M, CigarOperator.D) ) { + for ( final int nIntermediate : Arrays.asList(1, 2, 3) ) { + for ( final List combination : Utils.makePermutations(operators, nIntermediate, false) ) { + final int readLength = 2 + combination.size(); + GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, readLength); + read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); + read.setBaseQualities(Utils.dupBytes((byte) 30, readLength)); + + String cigar = "1" + firstOp; + for ( final CigarOperator op : combination ) cigar += "1" + op; + cigar += "1" + lastOp; + read.setCigarString(cigar); + + tests.add(new Object[]{read, firstOp, lastOp, combination}); + } + } + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "PrevAndNextTest") + public void testPrevAndNextTest(final GATKSAMRecord read, final CigarOperator firstOp, final CigarOperator lastOp, final List ops) { + final AlignmentStateMachine state = new AlignmentStateMachine(read); + + state.stepForwardOnGenome(); + final PileupElement pe = state.makePileupElement(); + Assert.assertEquals(pe.getBetweenNextPosition().size(), ops.size()); + Assert.assertEquals(pe.getBetweenPrevPosition().size(), 0); + assertEqualsOperators(pe.getBetweenNextPosition(), ops); + Assert.assertEquals(pe.getPreviousOnGenomeCigarElement(), null); + Assert.assertNotNull(pe.getNextOnGenomeCigarElement()); + Assert.assertEquals(pe.getNextOnGenomeCigarElement().getOperator(), lastOp); + + state.stepForwardOnGenome(); + final PileupElement pe2 = state.makePileupElement(); + Assert.assertEquals(pe2.getBetweenPrevPosition().size(), ops.size()); + Assert.assertEquals(pe2.getBetweenNextPosition().size(), 0); + assertEqualsOperators(pe2.getBetweenPrevPosition(), ops); + Assert.assertNotNull(pe2.getPreviousOnGenomeCigarElement()); + Assert.assertEquals(pe2.getPreviousOnGenomeCigarElement().getOperator(), firstOp); + Assert.assertEquals(pe2.getNextOnGenomeCigarElement(), null); + } + + private void assertEqualsOperators(final List elements, final List ops) { + for ( int i = 0; i < elements.size(); i++ ) { + Assert.assertEquals(elements.get(i).getOperator(), ops.get(i), "elements doesn't have expected operator at position " + i); + } + } +}