diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/AlignmentState.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/AlignmentState.java new file mode 100644 index 000000000..38caaa006 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/AlignmentState.java @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.locusiterator; + +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; + +import java.util.LinkedList; +import java.util.List; + +public final class AlignmentState { + /** + * Our read + */ + private final SAMRecord read; + + /** + * how far are we offset from the start of the read bases? + */ + private final int readOffset; + + /** + * how far are we offset from the alignment start on the genome? + */ + private final int genomeOffset; + + /** + * Our cigar element + */ + private final CigarElement cigarElement; + + /** + * how far are we into our cigarElement? + */ + private final int cigarElementCounter; + + private LinkedList betweenPrevPosition = null, betweenNextPosition = null; + private AlignmentState prev = null, next = null; + + public static AlignmentState makeInternalNode(final SAMRecord read, int readOffset, + int genomeOffset, CigarElement cigarElement, + int cigarElementCounter, final LinkedList betweenPrevAndThis) { + final AlignmentState state = new AlignmentState(read, readOffset, genomeOffset, cigarElement, cigarElementCounter); + state.setBetweenPrevPosition(betweenPrevAndThis); + return state; + } + + public static AlignmentState makeLeftEdge(final SAMRecord read) { + return new AlignmentState(read, -1, 1, null, -1); + } + + public static AlignmentState makeRightEdge(final SAMRecord read, final AlignmentState current, final LinkedList betweenCurrentAndThis) { + final AlignmentState state = new AlignmentState(read, -1, 1, null, -1); + state.setPrev(current); + state.setBetweenPrevPosition(betweenCurrentAndThis); + return state; + } + + protected AlignmentState(SAMRecord read, int readOffset, int genomeOffset, CigarElement cigarElement, int cigarElementCounter) { + this.read = read; + this.readOffset = readOffset; + this.genomeOffset = genomeOffset; + this.cigarElement = cigarElement; + this.cigarElementCounter = cigarElementCounter; + } + + /** + * Is this an edge state? I.e., one that is before or after the current read? + * @return true if this state is an edge state, false otherwise + */ + public boolean isEdge() { + return readOffset == -1; + } + + public SAMRecord getRead() { + return read; + } + + /** + * What is our current offset in the read's bases that aligns us with the reference genome? + * + * @return the current read offset position + */ + public int getReadOffset() { + return readOffset; + } + + /** + * What is the current offset w.r.t. the alignment state that aligns us to the readOffset? + * + * @return the current offset + */ + public int getGenomeOffset() { + return genomeOffset; + } + + public int getGenomePosition() { + return read.getAlignmentStart() + getGenomeOffset(); + } + + public GenomeLoc getLocation(final GenomeLocParser genomeLocParser) { + return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition()); + } + + public AlignmentState getPrev() { + return prev; + } + + public AlignmentState getNext() { + return next; + } + + public boolean hasPrev() { return prev != null; } + public boolean hasNext() { return next != null; } + public boolean prevIsEdge() { return hasPrev() && getPrev().isEdge(); } + public boolean nextIsEdge() { return hasNext() && getNext().isEdge(); } + + public CigarElement getCigarElement() { + return cigarElement; + } + + /** + * + * @return null if this is an edge state + */ + public CigarOperator getCigarOperator() { + return cigarElement == null ? null : cigarElement.getOperator(); + } + + public String toString() { + return String.format("%s ro=%d go=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarElementCounter, cigarElement); + } + + public int getCigarElementCounter() { + return cigarElementCounter; + } + + // ----------------------------------------------------------------------------------------------- + // Code for setting up prev / next states + // + // TODO -- should these functions all be protected? + // + // ----------------------------------------------------------------------------------------------- + + public void setBetweenPrevPosition(LinkedList betweenPrevPosition) { + this.betweenPrevPosition = betweenPrevPosition; + } + + public void setBetweenNextPosition(LinkedList betweenNextPosition) { + this.betweenNextPosition = betweenNextPosition; + } + + public LinkedList getBetweenPrevPosition() { + return betweenPrevPosition; + } + + public LinkedList getBetweenNextPosition() { + return betweenNextPosition; + } + + public void setPrev(AlignmentState prev) { + this.prev = prev; + } + + public void setNext(AlignmentState next) { + this.next = next; + } + + // ----------------------------------------------------------------------------------------------- + // Code for computing presence / absence of states in the prev / current / next + // ----------------------------------------------------------------------------------------------- + + public boolean isAfterDeletion() { return testOperator(getPrev(), CigarOperator.D); } + public boolean isBeforeDeletion() { return testOperator(getNext(), CigarOperator.D); } + public boolean isAfterInsertion() { return isAfter(getBetweenPrevPosition(), CigarOperator.I); } + public boolean isBeforeInsertion() { return isBefore(getBetweenNextPosition(), CigarOperator.I); } + + public boolean isAfterSoftClip() { return isAfter(getBetweenPrevPosition(), CigarOperator.S); } + public boolean isBeforeSoftClip() { return isBefore(getBetweenNextPosition(), CigarOperator.S); } + public boolean isNextToSoftClip() { return isAfterSoftClip() || isBeforeSoftClip(); } + + private boolean testOperator(final AlignmentState state, final CigarOperator op) { + return state != null && state.getCigarOperator() == op; + } + + private boolean isAfter(final LinkedList elements, final CigarOperator op) { + return ! elements.isEmpty() && elements.peekLast().getOperator() == op; + } + + private boolean isBefore(final List elements, final CigarOperator op) { + return ! elements.isEmpty() && elements.get(0).getOperator() == op; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachine.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachine.java new file mode 100644 index 000000000..0d4d29294 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachine.java @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.locusiterator; + +import com.google.java.contract.Requires; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.exceptions.UserException; + +import java.util.LinkedList; +import java.util.List; + +/** + * Steps a single read along its alignment to the genome + * + * The logical model for generating extended events is as follows: the "record state" + * implements the traversal along the reference; thus stepForwardOnGenome() returns + * on every and only on actual reference bases. This can be a (mis)match or a deletion + * (in the latter case, we still return on every individual reference base the deletion spans). + * In the extended events mode, the record state also remembers if there was an insertion, or + * if the deletion just started *right before* the current reference base the record state is + * pointing to upon the return from stepForwardOnGenome(). The next call to stepForwardOnGenome() + * will clear that memory (as we remember only extended events immediately preceding + * the current reference base). + * + * User: depristo + * Date: 1/5/13 + * Time: 1:08 PM + */ +class AlignmentStateMachine { + // TODO -- optimizations + // TODO -- only keep 3 States, and recycle the prev state to become the next state + + /** + * Our read + */ + private final SAMRecord read; + private final Cigar cigar; + private final int nCigarElements; + int cigarOffset = -1; + + AlignmentState prev = null, current = null, next = null; + + @Requires("read != null") + // TODO -- should enforce contracts like the read is aligned, etc + public AlignmentStateMachine(final SAMRecord read) { + this.read = read; + this.cigar = read.getCigar(); + this.nCigarElements = cigar.numCigarElements(); + this.prev = AlignmentState.makeLeftEdge(read); + } + + public SAMRecord getRead() { + return read; + } + + public AlignmentState getPrev() { + return prev; + } + + public AlignmentState getCurrent() { + return current; + } + + public AlignmentState getNext() { + return next; + } + + @Deprecated + public CigarElement peekForwardOnGenome() { + return null; + } + + @Deprecated + public CigarElement peekBackwardOnGenome() { + return null; + } + + public CigarOperator stepForwardOnGenome() { + if ( current == null ) { + // start processing from the edge by updating current to be prev + current = this.prev; + current = nextAlignmentState(); + } else { + // otherwise prev is current, and current is next + prev = current; + current = next; + } + + // if the current pointer isn't the edge, update next + if ( ! current.isEdge() ) + next = nextAlignmentState(); + else + next = null; + + finalizeStates(); + + // todo -- cleanup historical interface + return current.isEdge() ? null : current.getCigarOperator(); + } + + private void finalizeStates() { + // note the order of updates on the betweens. Next has info, and then current does, so + // the update order is next updates current, and current update prev + + if ( next != null ) { + // next can be null because current is the edge + assert ! current.isEdge(); + + next.setPrev(current); + + // Next holds the info about what happened between + // current and next, so we propagate it to current + current.setBetweenNextPosition(next.getBetweenPrevPosition()); + } + + // TODO -- prev setting to current is not necessary (except in creating the left edge) + prev.setNext(current); + prev.setBetweenNextPosition(current.getBetweenPrevPosition()); + + // current just needs to set prev and next + current.setPrev(prev); + current.setNext(next); + + } + + private AlignmentState nextAlignmentState() { + int cigarElementCounter = getCurrent().getCigarElementCounter(); + CigarElement curElement = getCurrent().getCigarElement(); + int genomeOffset = getCurrent().getGenomeOffset(); + int readOffset = getCurrent().getReadOffset(); + + // todo -- optimization: could keep null and allocate lazy since most of the time the between is empty + final LinkedList betweenCurrentAndNext = new LinkedList(); + + boolean done = false; + while ( ! done ) { + // we enter this method with readOffset = index of the last processed base on the read + // (-1 if we did not process a single base yet); this can be last matching base, + // or last base of an insertion + if (curElement == null || ++cigarElementCounter > curElement.getLength()) { + cigarOffset++; + if (cigarOffset < nCigarElements) { + curElement = cigar.getCigarElement(cigarOffset); + cigarElementCounter = 0; + // next line: guards against cigar elements of length 0; when new cigar element is retrieved, + // we reenter in order to re-check cigarElementCounter against curElement's length + } else { + if (curElement != null && curElement.getOperator() == CigarOperator.D) + throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar"); + return AlignmentState.makeRightEdge(read, getCurrent(), betweenCurrentAndNext); + } + + // in either case we continue the loop + continue; + } + + switch (curElement.getOperator()) { + case H: // ignore hard clips + case P: // ignore pads + cigarElementCounter = curElement.getLength(); + betweenCurrentAndNext.add(curElement); + break; + case I: // insertion w.r.t. the reference + case S: // soft clip + cigarElementCounter = curElement.getLength(); + readOffset += curElement.getLength(); + betweenCurrentAndNext.add(curElement); + break; + case D: // deletion w.r.t. the reference + if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string + throw new UserException.MalformedBAM(read, "read starts with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar"); + // should be the same as N case + genomeOffset++; + done = true; + break; + case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning) + genomeOffset++; + done = true; + break; + case M: + case EQ: + case X: + readOffset++; + genomeOffset++; + done = true; + break; + default: + throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator()); + } + } + + return AlignmentState.makeInternalNode(read, readOffset, genomeOffset, curElement, cigarElementCounter, betweenCurrentAndNext); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachineUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachineUnitTest.java new file mode 100644 index 000000000..f4abe2507 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachineUnitTest.java @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.locusiterator; + +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.List; + +/** + * testing of the new (non-legacy) version of LocusIteratorByState + */ +public class AlignmentStateMachineUnitTest extends LocusIteratorByStateBaseTest { + @DataProvider(name = "AlignmentStateMachineTest") + public Object[][] makeAlignmentStateMachineTest() { +// return new Object[][]{{new LIBSTest("2X2D2P2X", 1)}}; +// return createLIBSTests( +// Arrays.asList(1, 2), +// Arrays.asList(5)); + return createLIBSTests( + Arrays.asList(1, 2), + Arrays.asList(1, 2, 3, 4)); + } + + @Test(dataProvider = "AlignmentStateMachineTest") + public void testAlignmentStateMachineTest(LIBSTest params) { + final GATKSAMRecord read = params.makeRead(); + final AlignmentStateMachine stateMachine = new AlignmentStateMachine(read); + final LIBS_position tester = new LIBS_position(read); + + // min is one because always visit something, even for 10I reads + final int expectedBpToVisit = read.getAlignmentEnd() - read.getAlignmentStart() + 1; + + Assert.assertSame(stateMachine.getRead(), read); + Assert.assertNotNull(stateMachine.toString()); + + int bpVisited = 0; + int lastOffset = -1; + + // TODO -- test state machine state before first step? + + while ( stateMachine.stepForwardOnGenome() != null ) { + tester.stepForwardOnGenome(); + final AlignmentState state = stateMachine.getCurrent(); + + Assert.assertTrue(state.getReadOffset() >= lastOffset, "Somehow read offsets are decreasing: lastOffset " + lastOffset + " current " + state.getReadOffset()); + Assert.assertEquals(state.getReadOffset(), tester.getCurrentReadOffset(), "Read offsets are wrong at " + bpVisited); + + if ( bpVisited == 0 ) { + Assert.assertTrue(state.getPrev().isEdge()); + Assert.assertTrue(state.prevIsEdge()); + } + + if ( bpVisited == expectedBpToVisit ) { + Assert.assertTrue(state.hasNext()); + Assert.assertTrue(state.nextIsEdge()); + } + + if ( ! state.nextIsEdge() ) + Assert.assertSame(state.getNext().getPrev(), state); + + testSequencialStatesAreConsistent(state.getPrev(), state); + testSequencialStatesAreConsistent(state, state.getNext()); + + if ( ! workAroundOpsBetweenDeletion(state.getBetweenPrevPosition())) + Assert.assertEquals(state.isAfterDeletion(), tester.isAfterDeletedBase, "fails after deletion"); + if ( ! workAroundOpsBetweenDeletion(state.getBetweenNextPosition())) + Assert.assertEquals(state.isBeforeDeletion(), tester.isBeforeDeletedBase, "fails before deletion"); + Assert.assertEquals(state.isAfterInsertion(), tester.isAfterInsertion, "fails after insertion"); + Assert.assertEquals(state.isBeforeInsertion(), tester.isBeforeInsertion, "Fails before insertion"); + Assert.assertEquals(state.isNextToSoftClip(), tester.isNextToSoftClip, "Fails soft clip test"); + + // TODO -- fixme + //Assert.assertEquals(state.getCigarElementCounter(), tester.currentOperatorIndex, "CigarElement indice failure"); + + // TODO -- state.getGenomeOffset(); + // TODO -- state.getGenomePosition(); + // TODO -- Assert.assertEquals(state.getLocation(genomeLocParser), EXPECTATION); + + lastOffset = state.getReadOffset(); + bpVisited++; + } + + Assert.assertTrue(stateMachine.getCurrent().isEdge()); + Assert.assertFalse(stateMachine.getCurrent().hasNext()); + Assert.assertEquals(stateMachine.getCurrent().getNext(), null); + + Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp"); + } + + /** + * Work around inadequate tests that aren't worth fixing. + * + * Look at the CIGAR 2M2P2D2P2M. Both M states border a deletion, separated by P (padding elements). So + * the right answer for deletions here is true for isBeforeDeletion() and isAfterDeletion() for the first + * and second M. But the LIBS_position doesn't say so. + * + * @param elements + * @return + */ + private boolean workAroundOpsBetweenDeletion(final List elements) { + for ( final CigarElement elt : elements ) + if ( elt.getOperator() == CigarOperator.P || elt.getOperator() == CigarOperator.H || elt.getOperator() == CigarOperator.S ) + return true; + return false; + } + + private void testSequencialStatesAreConsistent(final AlignmentState left, final AlignmentState right) { + Assert.assertSame(left.getNext(), right); + Assert.assertSame(right.getPrev(), left); + Assert.assertSame(left.getBetweenNextPosition(), right.getBetweenPrevPosition()); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorBenchmark.java new file mode 100644 index 000000000..0eb836caf --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorBenchmark.java @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.locusiterator; + +import com.google.caliper.Param; +import com.google.caliper.SimpleBenchmark; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.fragments.FragmentUtils; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; + +/** + * Caliper microbenchmark of fragment pileup + */ +public class LocusIteratorBenchmark extends SimpleBenchmark { + protected SAMFileHeader header; + protected GenomeLocParser genomeLocParser; + + List reads = new LinkedList(); + final int readLength = 101; + final int nReads = 10000; + final int locus = 1; + + @Param({"101M", "50M10I40M", "50M10D40M"}) + String cigar; // set automatically by framework + + @Override protected void setUp() { + header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); + + for ( int j = 0; j < nReads; j++ ) { + GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, readLength); + read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); + final byte[] quals = new byte[readLength]; + for ( int i = 0; i < readLength; i++ ) + quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE); + read.setBaseQualities(quals); + read.setCigarString(cigar); + reads.add(read); + } + } + + public void timeOriginalLIBS(int rep) { + for ( int i = 0; i < rep; i++ ) { + final LocusIteratorByState libs = + new LocusIteratorByState( + new LocusIteratorByStateBaseTest.FakeCloseableIterator(reads.iterator()), + LocusIteratorByStateBaseTest.createTestReadProperties(), + genomeLocParser, + LocusIteratorByStateBaseTest.sampleListForSAMWithoutReadGroups()); + + while ( libs.hasNext() ) { + AlignmentContext context = libs.next(); + } + } + } + + public void timeOriginalLIBSStateMachine(int rep) { + for ( int i = 0; i < rep; i++ ) { + for ( final SAMRecord read : reads ) { + final SAMRecordAlignmentState alignmentStateMachine = new SAMRecordAlignmentState(read); + while ( alignmentStateMachine.stepForwardOnGenome() != null ) { + alignmentStateMachine.getGenomeOffset(); + } + } + } + } + + public void timeAlignmentStateMachine(int rep) { + for ( int i = 0; i < rep; i++ ) { + for ( final SAMRecord read : reads ) { + final AlignmentStateMachine alignmentStateMachine = new AlignmentStateMachine(read); + while ( alignmentStateMachine.stepForwardOnGenome() != null ) { + alignmentStateMachine.getCurrent(); + } + } + } + } + + public static void main(String[] args) { + com.google.caliper.Runner.main(LocusIteratorBenchmark.class, args); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java index 448b3489e..38c715a77 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java @@ -67,7 +67,7 @@ public class LocusIteratorByStateBaseTest extends BaseTest { * For testing only. Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list * for the system. */ - protected static List sampleListForSAMWithoutReadGroups() { + public static List sampleListForSAMWithoutReadGroups() { List samples = new ArrayList(); samples.add(null); return samples; @@ -81,11 +81,11 @@ public class LocusIteratorByStateBaseTest extends BaseTest { sampleListForSAMWithoutReadGroups()); } - protected static ReadProperties createTestReadProperties() { + public static ReadProperties createTestReadProperties() { return createTestReadProperties(null, false); } - protected static ReadProperties createTestReadProperties( DownsamplingMethod downsamplingMethod, final boolean keepReads ) { + public static ReadProperties createTestReadProperties( DownsamplingMethod downsamplingMethod, final boolean keepReads ) { return new ReadProperties( Collections.emptyList(), new SAMFileHeader(), @@ -222,7 +222,8 @@ public class LocusIteratorByStateBaseTest extends BaseTest { hasMatch = hasMatch || ce.getOperator() == CigarOperator.M; } - if ( ! hasMatch ) + if ( ! hasMatch && elements.size() == 1 && + ! (last.getOperator() == CigarOperator.I || last.getOperator() == CigarOperator.S)) return null; return new LIBSTest(elements, cigar, len);