diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java index 7c81f878c..fe0488846 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java @@ -113,6 +113,7 @@ public class WindowMaker implements Iterable, I // Use the legacy version of LocusIteratorByState if legacy downsampling was requested: libs = ! sourceInfo.getDownsamplingMethod().useLegacyDownsampler ? new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleNames) : null; this.sourceIterator = sourceInfo.getDownsamplingMethod().useLegacyDownsampler + // TODO -- remove me when we collapse legacy engine fork ? new PeekableIterator(new LegacyLocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleNames)) : new PeekableIterator(libs); @@ -120,7 +121,7 @@ public class WindowMaker implements Iterable, I } public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List intervals ) { - this(shard, genomeLocParser, iterator, intervals, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups()); + this(shard, genomeLocParser, iterator, intervals, LocusIteratorByState.sampleListForSAMWithoutReadGroups()); } public Iterator iterator() { diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java index fe769bead..e3eacd56a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java @@ -403,4 +403,14 @@ public class LocusIteratorByState extends LocusIterator { throw new IllegalStateException("Tried to create a pileup for read " + read + " with offset " + offset + " but we never saw such an offset in the alignment state machine"); } + + /** + * For testing only. Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list + * for the system. + */ + public static List sampleListForSAMWithoutReadGroups() { + List samples = new ArrayList(); + samples.add(null); + return samples; + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/old/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/old/LocusIteratorByState.java deleted file mode 100755 index 09ba8f229..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/old/LocusIteratorByState.java +++ /dev/null @@ -1,326 +0,0 @@ -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.locusiterator.old; - -import com.google.java.contract.Ensures; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; -import net.sf.samtools.SAMRecord; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.ReadProperties; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.downsampling.*; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.locusiterator.LIBSDownsamplingInfo; -import org.broadinstitute.sting.utils.locusiterator.LocusIterator; -import org.broadinstitute.sting.utils.locusiterator.legacy.LegacyLocusIteratorByState; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.sam.ReadUtils; - -import java.util.*; - -/** - * Iterator that traverses a SAM File, accumulating information on a per-locus basis - */ -public class LocusIteratorByState extends LocusIterator { - /** - * our log, which we want to capture anything from this class - */ - private static Logger logger = Logger.getLogger(LegacyLocusIteratorByState.class); - - // ----------------------------------------------------------------------------------------------------------------- - // - // member fields - // - // ----------------------------------------------------------------------------------------------------------------- - - /** - * Used to create new GenomeLocs. - */ - private final GenomeLocParser genomeLocParser; - private final ArrayList samples; - private final ReadStateManager readStates; - private final boolean includeReadsWithDeletionAtLoci; - - private AlignmentContext nextAlignmentContext; - - // ----------------------------------------------------------------------------------------------------------------- - // - // constructors and other basic operations - // - // ----------------------------------------------------------------------------------------------------------------- - - public LocusIteratorByState(final Iterator samIterator, - final ReadProperties readInformation, - final GenomeLocParser genomeLocParser, - final Collection samples) { - this(samIterator, - toDownsamplingInfo(readInformation), - readInformation.includeReadsWithDeletionAtLoci(), - genomeLocParser, - samples, - readInformation.keepUniqueReadListInLIBS()); - } - - protected LocusIteratorByState(final Iterator samIterator, - final LIBSDownsamplingInfo downsamplingInfo, - final boolean includeReadsWithDeletionAtLoci, - final GenomeLocParser genomeLocParser, - final Collection samples, - final boolean maintainUniqueReadsList ) { - this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci; - this.genomeLocParser = genomeLocParser; - this.samples = new ArrayList(samples); - this.readStates = new ReadStateManager(samIterator, this.samples, downsamplingInfo, maintainUniqueReadsList); - - // currently the GATK expects this LocusIteratorByState to accept empty sample lists, when - // there's no read data. So we need to throw this error only when samIterator.hasNext() is true - if (this.samples.isEmpty() && samIterator.hasNext()) { - throw new IllegalArgumentException("samples list must not be empty"); - } - } - - @Override - public Iterator iterator() { - return this; - } - - @Override - public void close() { - } - - @Override - public boolean hasNext() { - lazyLoadNextAlignmentContext(); - return nextAlignmentContext != null; - } - - private GenomeLoc getLocation() { - return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser); - } - - // ----------------------------------------------------------------------------------------------------------------- - // - // next() routine and associated collection operations - // - // ----------------------------------------------------------------------------------------------------------------- - - @Override - public AlignmentContext next() { - lazyLoadNextAlignmentContext(); - if (!hasNext()) - throw new NoSuchElementException("LocusIteratorByState: out of elements."); - AlignmentContext currentAlignmentContext = nextAlignmentContext; - nextAlignmentContext = null; - return currentAlignmentContext; - } - - /** - * Creates the next alignment context from the given state. Note that this is implemented as a lazy load method. - * nextAlignmentContext MUST BE null in order for this method to advance to the next entry. - */ - private void lazyLoadNextAlignmentContext() { - while (nextAlignmentContext == null && readStates.hasNext()) { - readStates.collectPendingReads(); - - final GenomeLoc location = getLocation(); - final Map fullPileup = new HashMap(); - - // TODO: How can you determine here whether the current pileup has been downsampled? - boolean hasBeenSampled = false; - - for (final String sample : samples) { - final Iterator iterator = readStates.iterator(sample); - final List pile = new ArrayList(readStates.size(sample)); - - int size = 0; // number of elements in this sample's pileup - int nDeletions = 0; // number of deletions in this sample's pileup - int nMQ0Reads = 0; // number of MQ0 reads in this sample's pileup (warning: current implementation includes N bases that are MQ0) - - while (iterator.hasNext()) { - final SAMRecordAlignmentState state = iterator.next(); // state object with the read/offset information - final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read - final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator - final CigarElement nextElement = state.peekForwardOnGenome(); // next cigar element - final CigarElement lastElement = state.peekBackwardOnGenome(); // last cigar element - final boolean isSingleElementCigar = nextElement == lastElement; - final CigarOperator nextOp = nextElement.getOperator(); // next cigar operator - final CigarOperator lastOp = lastElement.getOperator(); // last cigar operator - int readOffset = state.getReadOffset(); // the base offset on this read - - final boolean isBeforeDeletion = nextOp == CigarOperator.DELETION; - final boolean isAfterDeletion = lastOp == CigarOperator.DELETION; - final boolean isBeforeInsertion = nextOp == CigarOperator.INSERTION; - final boolean isAfterInsertion = lastOp == CigarOperator.INSERTION && !isSingleElementCigar; - final boolean isNextToSoftClip = nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart()); - - int nextElementLength = nextElement.getLength(); - - if (op == CigarOperator.N) // N's are never added to any pileup - continue; - - if (op == CigarOperator.D) { - // TODO -- LIBS is totally busted for deletions so that reads with Ds right before Is in their CIGAR are broken; must fix - if (includeReadsWithDeletionAtLoci) { // only add deletions to the pileup if we are authorized to do so - pile.add(new PileupElement(read, readOffset, true, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, null, nextOp == CigarOperator.D ? nextElementLength : -1)); - size++; - nDeletions++; - if (read.getMappingQuality() == 0) - nMQ0Reads++; - } - } - else { - if (!filterBaseInRead(read, location.getStart())) { - String insertedBaseString = null; - if (nextOp == CigarOperator.I) { - final int insertionOffset = isSingleElementCigar ? 0 : 1; - // TODO -- someone please implement a better fix for the single element insertion CIGAR! - if (isSingleElementCigar) - readOffset -= (nextElement.getLength() - 1); // LIBS has passed over the insertion bases! - insertedBaseString = new String(Arrays.copyOfRange(read.getReadBases(), readOffset + insertionOffset, readOffset + insertionOffset + nextElement.getLength())); - } - - pile.add(new PileupElement(read, readOffset, false, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, insertedBaseString, nextElementLength)); - size++; - if (read.getMappingQuality() == 0) - nMQ0Reads++; - } - } - } - - if (pile.size() != 0) // if this pileup added at least one base, add it to the full pileup - fullPileup.put(sample, new ReadBackedPileupImpl(location, pile, size, nDeletions, nMQ0Reads)); - } - - updateReadStates(); // critical - must be called after we get the current state offsets and location - if (!fullPileup.isEmpty()) // if we got reads with non-D/N over the current position, we are done - nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location, fullPileup), hasBeenSampled); - } - } - - private void updateReadStates() { - for (final String sample : samples) { - Iterator it = readStates.iterator(sample); - while (it.hasNext()) { - SAMRecordAlignmentState state = it.next(); - CigarOperator op = state.stepForwardOnGenome(); - if (op == null) { - // we discard the read only when we are past its end AND indel at the end of the read (if any) was - // already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe - // as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag. - it.remove(); // we've stepped off the end of the object - } - } - } - } - - // ----------------------------------------------------------------------------------------------------------------- - // - // getting the list of reads - // - // ----------------------------------------------------------------------------------------------------------------- - - /** - * Transfer current list of all unique reads that have ever been used in any pileup, clearing old list - * - * This list is guaranteed to only contain unique reads, even across calls to the this function. It is - * literally the unique set of reads ever seen. - * - * The list occurs in the same order as they are encountered in the underlying iterator. - * - * Takes the maintained list of submitted reads, and transfers it to the caller of this - * function. The old list of set to a new, cleanly allocated list so the caller officially - * owns the list returned by this call. This is the only way to clear the tracking - * of submitted reads, if enabled. - * - * The purpose of this function is allow users of LIBS to keep track of all of the reads pulled off the - * underlying SAMRecord iterator and that appeared at any point in the list of SAMRecordAlignmentState for - * any reads. This function is intended to allow users to efficiently reconstruct the unique set of reads - * used across all pileups. This is necessary for LIBS to handle because attempting to do - * so from the pileups coming out of LIBS is extremely expensive. - * - * This functionality is only available if LIBS was created with the argument to track the reads - * - * @throws UnsupportedOperationException if called when keepingSubmittedReads is false - * - * @return the current list - */ - @Ensures("result != null") - public List transferReadsFromAllPreviousPileups() { - return readStates.transferSubmittedReads(); - } - - /** - * Get the underlying list of tracked reads. For testing only - * @return a non-null list - */ - @Ensures("result != null") - protected List getReadsFromAllPreviousPileups() { - return readStates.getSubmittedReads(); - } - - // ----------------------------------------------------------------------------------------------------------------- - // - // utility functions - // - // ----------------------------------------------------------------------------------------------------------------- - - /** - * Generic place to put per-base filters appropriate to LocusIteratorByState - * - * @param rec - * @param pos - * @return - */ - private boolean filterBaseInRead(GATKSAMRecord rec, long pos) { - return ReadUtils.isBaseInsideAdaptor(rec, pos); - } - - /** - * Create a LIBSDownsamplingInfo object from the requested info in ReadProperties - * - * LIBS will invoke the Reservoir and Leveling downsamplers on the read stream if we're - * downsampling to coverage by sample. SAMDataSource will have refrained from applying - * any downsamplers to the read stream in this case, in the expectation that LIBS will - * manage the downsampling. The reason for this is twofold: performance (don't have to - * split/re-assemble the read stream in SAMDataSource), and to enable partial downsampling - * of reads (eg., using half of a read, and throwing the rest away). - * - * @param readInfo GATK engine information about what should be done to the reads - * @return a LIBS specific info holder about downsampling only - */ - private static LIBSDownsamplingInfo toDownsamplingInfo(final ReadProperties readInfo) { - final boolean performDownsampling = readInfo.getDownsamplingMethod() != null && - readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE && - readInfo.getDownsamplingMethod().toCoverage != null; - final int coverage = performDownsampling ? readInfo.getDownsamplingMethod().toCoverage : 0; - - return new LIBSDownsamplingInfo(performDownsampling, coverage); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/old/ReadStateManager.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/old/ReadStateManager.java deleted file mode 100644 index 322bab0ee..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/old/ReadStateManager.java +++ /dev/null @@ -1,351 +0,0 @@ -/* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.locusiterator.old; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; -import net.sf.picard.util.PeekableIterator; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.downsampling.Downsampler; -import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler; -import org.broadinstitute.sting.utils.locusiterator.LIBSDownsamplingInfo; - -import java.util.*; - -/** - * Manages and updates mapping from sample -> List of SAMRecordAlignmentState - * - * Optionally can keep track of all of the reads pulled off the iterator and - * that appeared at any point in the list of SAMRecordAlignmentState for any reads. - * This functionaly is only possible at this stage, as this object does the popping of - * reads off the underlying source iterator, and presents only a pileup-like interface - * of samples -> SAMRecordAlignmentStates. Reconstructing the unique set of reads - * used across all pileups is extremely expensive from that data structure. - * - * User: depristo - * Date: 1/5/13 - * Time: 2:02 PM - */ -class ReadStateManager { - private final List samples; - private final PeekableIterator iterator; - private final SamplePartitioner samplePartitioner; - private final Map readStatesBySample = new HashMap(); - - private LinkedList submittedReads; - private final boolean keepSubmittedReads; - - private int totalReadStates = 0; - - public ReadStateManager(final Iterator source, - final List samples, - final LIBSDownsamplingInfo LIBSDownsamplingInfo, - final boolean keepSubmittedReads) { - this.samples = samples; - this.iterator = new PeekableIterator(source); - - this.keepSubmittedReads = keepSubmittedReads; - this.submittedReads = new LinkedList(); - - for (final String sample : samples) { - readStatesBySample.put(sample, new PerSampleReadStateManager(LIBSDownsamplingInfo)); - } - - samplePartitioner = new SamplePartitioner(LIBSDownsamplingInfo, samples); - } - - /** - * Returns a iterator over all the reads associated with the given sample. Note that remove() is implemented - * for this iterator; if present, total read states will be decremented. - * - * @param sample The sample. - * @return Iterator over the reads associated with that sample. - */ - public Iterator iterator(final String sample) { - return new Iterator() { - private Iterator wrappedIterator = readStatesBySample.get(sample).iterator(); - - public boolean hasNext() { - return wrappedIterator.hasNext(); - } - - public SAMRecordAlignmentState next() { - return wrappedIterator.next(); - } - - public void remove() { - wrappedIterator.remove(); - } - }; - } - - public boolean isEmpty() { - return totalReadStates == 0; - } - - /** - * Retrieves the total number of reads in the manager across all samples. - * - * @return Total number of reads over all samples. - */ - public int size() { - return totalReadStates; - } - - /** - * Retrieves the total number of reads in the manager in the given sample. - * - * @param sample The sample. - * @return Total number of reads in the given sample. - */ - public int size(final String sample) { - return readStatesBySample.get(sample).size(); - } - - public SAMRecordAlignmentState getFirst() { - for (final String sample : samples) { - PerSampleReadStateManager reads = readStatesBySample.get(sample); - if (!reads.isEmpty()) - return reads.peek(); - } - return null; - } - - public boolean hasNext() { - return totalReadStates > 0 || iterator.hasNext(); - } - - // fast testing of position - private boolean readIsPastCurrentPosition(SAMRecord read) { - if (isEmpty()) - return false; - else { - SAMRecordAlignmentState state = getFirst(); - SAMRecord ourRead = state.getRead(); - return read.getReferenceIndex() > ourRead.getReferenceIndex() || read.getAlignmentStart() > state.getGenomePosition(); - } - } - - public void collectPendingReads() { - if (!iterator.hasNext()) - return; - - // the next record in the stream, peeked as to not remove it from the stream - if ( isEmpty() ) { - final int firstContigIndex = iterator.peek().getReferenceIndex(); - final int firstAlignmentStart = iterator.peek().getAlignmentStart(); - while (iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) { - submitRead(iterator.next()); - } - } else { - // Fast fail in the case that the read is past the current position. - if (readIsPastCurrentPosition(iterator.peek())) - return; - - while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) { - submitRead(iterator.next()); - } - } - - samplePartitioner.doneSubmittingReads(); - - for (final String sample : samples) { - Collection newReads = samplePartitioner.getReadsForSample(sample); - PerSampleReadStateManager statesBySample = readStatesBySample.get(sample); - addReadsToSample(statesBySample, newReads); - } - - samplePartitioner.reset(); - } - - /** - * Add a read to the sample partitioner, potentially adding it to all submitted reads, if appropriate - * @param read a non-null read - */ - @Requires("read != null") - protected void submitRead(final SAMRecord read) { - if ( keepSubmittedReads ) - submittedReads.add(read); - samplePartitioner.submitRead(read); - } - - /** - * Transfer current list of submitted reads, clearing old list - * - * Takes the maintained list of submitted reads, and transfers it to the caller of this - * function. The old list of set to a new, cleanly allocated list so the caller officially - * owns the list returned by this call. This is the only way to clear the tracking - * of submitted reads, if enabled. - * - * How to use this function: - * - * while ( doing some work unit, such as creating pileup at some locus ): - * interact with ReadStateManager in some way to make work unit - * readsUsedInPileup = transferSubmittedReads) - * - * @throws UnsupportedOperationException if called when keepSubmittedReads is false - * - * @return the current list of submitted reads - */ - @Ensures({ - "result != null", - "result != submittedReads" // result and previous submitted reads are not == objects - }) - public List transferSubmittedReads() { - if ( ! keepSubmittedReads ) throw new UnsupportedOperationException("cannot transferSubmittedReads if you aren't keeping them"); - - final List prevSubmittedReads = submittedReads; - this.submittedReads = new LinkedList(); - - return prevSubmittedReads; - } - - /** - * Are we keeping submitted reads, or not? - * @return true if we are keeping them, false otherwise - */ - public boolean isKeepingSubmittedReads() { - return keepSubmittedReads; - } - - /** - * Obtain a pointer to the list of submitted reads. - * - * This is not a copy of the list; it is shared with this ReadStateManager. It should - * not be modified. Updates to this ReadStateManager may change the contains of the - * list entirely. - * - * For testing purposes only. - * - * Will always be empty if we are are not keepSubmittedReads - * - * @return a non-null list of reads that have been submitted to this ReadStateManager - */ - @Ensures({"result != null","keepSubmittedReads || result.isEmpty()"}) - protected List getSubmittedReads() { - return submittedReads; - } - - /** - * Add reads with the given sample name to the given hanger entry. - * - * @param readStates The list of read states to add this collection of reads. - * @param reads Reads to add. Selected reads will be pulled from this source. - */ - private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection reads) { - if (reads.isEmpty()) - return; - - Collection newReadStates = new LinkedList(); - - for (SAMRecord read : reads) { - SAMRecordAlignmentState state = new SAMRecordAlignmentState(read); - state.stepForwardOnGenome(); - newReadStates.add(state); - } - - readStates.addStatesAtNextAlignmentStart(newReadStates); - } - - protected class PerSampleReadStateManager implements Iterable { - private List> readStatesByAlignmentStart = new LinkedList>(); - private final Downsampler> levelingDownsampler; - - private int thisSampleReadStates = 0; - - public PerSampleReadStateManager(final LIBSDownsamplingInfo LIBSDownsamplingInfo) { - this.levelingDownsampler = LIBSDownsamplingInfo.isPerformDownsampling() - ? new LevelingDownsampler, SAMRecordAlignmentState>(LIBSDownsamplingInfo.getToCoverage()) - : null; - } - - public void addStatesAtNextAlignmentStart(Collection states) { - if ( states.isEmpty() ) { - return; - } - - readStatesByAlignmentStart.add(new LinkedList(states)); - thisSampleReadStates += states.size(); - totalReadStates += states.size(); - - if ( levelingDownsampler != null ) { - levelingDownsampler.submit(readStatesByAlignmentStart); - levelingDownsampler.signalEndOfInput(); - - thisSampleReadStates -= levelingDownsampler.getNumberOfDiscardedItems(); - totalReadStates -= levelingDownsampler.getNumberOfDiscardedItems(); - - // use returned List directly rather than make a copy, for efficiency's sake - readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems(); - levelingDownsampler.reset(); - } - } - - public boolean isEmpty() { - return readStatesByAlignmentStart.isEmpty(); - } - - public SAMRecordAlignmentState peek() { - return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek(); - } - - public int size() { - return thisSampleReadStates; - } - - public Iterator iterator() { - return new Iterator() { - private Iterator> alignmentStartIterator = readStatesByAlignmentStart.iterator(); - private LinkedList currentPositionReadStates = null; - private Iterator currentPositionReadStatesIterator = null; - - public boolean hasNext() { - return alignmentStartIterator.hasNext() || - (currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext()); - } - - public SAMRecordAlignmentState next() { - if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) { - currentPositionReadStates = alignmentStartIterator.next(); - currentPositionReadStatesIterator = currentPositionReadStates.iterator(); - } - - return currentPositionReadStatesIterator.next(); - } - - public void remove() { - currentPositionReadStatesIterator.remove(); - thisSampleReadStates--; - totalReadStates--; - - if ( currentPositionReadStates.isEmpty() ) { - alignmentStartIterator.remove(); - } - } - }; - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/old/SAMRecordAlignmentState.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/old/SAMRecordAlignmentState.java deleted file mode 100644 index 9b51a8011..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/old/SAMRecordAlignmentState.java +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.locusiterator.old; - -import com.google.java.contract.Requires; -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.UserException; - -/** - * Steps a single read along its alignment to the genome - * - * The logical model for generating extended events is as follows: the "record state" - * implements the traversal along the reference; thus stepForwardOnGenome() returns - * on every and only on actual reference bases. This can be a (mis)match or a deletion - * (in the latter case, we still return on every individual reference base the deletion spans). - * In the extended events mode, the record state also remembers if there was an insertion, or - * if the deletion just started *right before* the current reference base the record state is - * pointing to upon the return from stepForwardOnGenome(). The next call to stepForwardOnGenome() - * will clear that memory (as we remember only extended events immediately preceding - * the current reference base). - * - * User: depristo - * Date: 1/5/13 - * Time: 1:08 PM - */ -public class SAMRecordAlignmentState { - // TODO -- one idea to clean up this functionality: - // TODO -- - // TODO -- split functionality here into an alignment state machine and an - // TODO -- alignment state. The alignment state simply carries with it the - // TODO -- state of the alignment (the current cigar op, the genome offset, - // TODO -- the read offset, etc. The AlignmentStateMachine produces these - // TODO -- states, and has operations such stepForwardOnGenome, getLastState(), - // TODO -- getCurrentState(), getNextState(); - - /** - * Our read - */ - private final SAMRecord read; - private final Cigar cigar; - private final int nCigarElements; - - /** - * how far are we offset from the start of the read bases? - */ - int readOffset = -1; - - /** - * how far are we offset from the alignment start on the genome? - */ - int genomeOffset = -1; - - int cigarOffset = -1; - CigarElement curElement = null; - - /** - * how far are we into a single cigarElement? - */ - int cigarElementCounter = -1; - - @Requires("read != null") - // TODO -- should enforce contracts like the read is aligned, etc - public SAMRecordAlignmentState(final SAMRecord read) { - this.read = read; - this.cigar = read.getCigar(); - this.nCigarElements = cigar.numCigarElements(); - } - - public SAMRecord getRead() { - return read; - } - - /** - * What is our current offset in the read's bases that aligns us with the reference genome? - * - * @return the current read offset position - */ - public int getReadOffset() { - return readOffset; - } - - /** - * What is the current offset w.r.t. the alignment state that aligns us to the readOffset? - * - * @return the current offset - */ - public int getGenomeOffset() { - return genomeOffset; - } - - public int getGenomePosition() { - return read.getAlignmentStart() + getGenomeOffset(); - } - - public GenomeLoc getLocation(GenomeLocParser genomeLocParser) { - return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition()); - } - - public CigarOperator getCurrentCigarOperator() { - return curElement.getOperator(); - } - - public String toString() { - return String.format("%s ro=%d go=%d co=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarOffset, cigarElementCounter, curElement); - } - - public CigarElement peekForwardOnGenome() { - return ( cigarElementCounter + 1 > curElement.getLength() && cigarOffset + 1 < nCigarElements ? cigar.getCigarElement(cigarOffset + 1) : curElement ); - } - - public CigarElement peekBackwardOnGenome() { - return ( cigarElementCounter - 1 == 0 && cigarOffset - 1 > 0 ? cigar.getCigarElement(cigarOffset - 1) : curElement ); - } - - public CigarOperator stepForwardOnGenome() { - // we enter this method with readOffset = index of the last processed base on the read - // (-1 if we did not process a single base yet); this can be last matching base, - // or last base of an insertion - if (curElement == null || ++cigarElementCounter > curElement.getLength()) { - cigarOffset++; - if (cigarOffset < nCigarElements) { - curElement = cigar.getCigarElement(cigarOffset); - cigarElementCounter = 0; - // next line: guards against cigar elements of length 0; when new cigar element is retrieved, - // we reenter in order to re-check cigarElementCounter against curElement's length - return stepForwardOnGenome(); - } else { - if (curElement != null && curElement.getOperator() == CigarOperator.D) - throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar"); - - // Reads that contain indels model the genomeOffset as the following base in the reference. Because - // we fall into this else block only when indels end the read, increment genomeOffset such that the - // current offset of this read is the next ref base after the end of the indel. This position will - // model a point on the reference somewhere after the end of the read. - genomeOffset++; // extended events need that. Logically, it's legal to advance the genomic offset here: - // we do step forward on the ref, and by returning null we also indicate that we are past the read end. - - return null; - } - } - - boolean done = false; - switch (curElement.getOperator()) { - case H: // ignore hard clips - case P: // ignore pads - cigarElementCounter = curElement.getLength(); - break; - case I: // insertion w.r.t. the reference - case S: // soft clip - cigarElementCounter = curElement.getLength(); - readOffset += curElement.getLength(); - break; - case D: // deletion w.r.t. the reference - if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string - throw new UserException.MalformedBAM(read, "read starts with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar"); - // should be the same as N case - genomeOffset++; - done = true; - break; - case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning) - genomeOffset++; - done = true; - break; - case M: - case EQ: - case X: - readOffset++; - genomeOffset++; - done = true; - break; - default: - throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator()); - } - - return done ? curElement.getOperator() : stepForwardOnGenome(); - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/old/SamplePartitioner.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/old/SamplePartitioner.java deleted file mode 100644 index 1f6c81f04..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/old/SamplePartitioner.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.locusiterator.old; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.downsampling.Downsampler; -import org.broadinstitute.sting.gatk.downsampling.PassThroughDownsampler; -import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler; -import org.broadinstitute.sting.utils.locusiterator.LIBSDownsamplingInfo; - -import java.util.*; - -/** - * Divides reads by sample and (if requested) does a preliminary downsampling pass with a ReservoirDownsampler. - * - * Note: stores reads by sample ID string, not by sample object - */ -class SamplePartitioner { - private Map> readsBySample; - - public SamplePartitioner(final LIBSDownsamplingInfo LIBSDownsamplingInfo, final List samples) { - readsBySample = new HashMap>(samples.size()); - for ( String sample : samples ) { - readsBySample.put(sample, createDownsampler(LIBSDownsamplingInfo)); - } - } - - private Downsampler createDownsampler(final LIBSDownsamplingInfo LIBSDownsamplingInfo) { - return LIBSDownsamplingInfo.isPerformDownsampling() - ? new ReservoirDownsampler(LIBSDownsamplingInfo.getToCoverage()) - : new PassThroughDownsampler(); - } - - public void submitRead(SAMRecord read) { - String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null; - if (readsBySample.containsKey(sampleName)) - readsBySample.get(sampleName).submit(read); - } - - public void doneSubmittingReads() { - for ( Map.Entry> perSampleReads : readsBySample.entrySet() ) { - perSampleReads.getValue().signalEndOfInput(); - } - } - - public Collection getReadsForSample(String sampleName) { - if ( ! readsBySample.containsKey(sampleName) ) - throw new NoSuchElementException("Sample name not found"); - - return readsBySample.get(sampleName).consumeFinalizedItems(); - } - - public void reset() { - for ( Map.Entry> perSampleReads : readsBySample.entrySet() ) { - perSampleReads.getValue().clear(); - perSampleReads.getValue().reset(); - } - } -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java index 461bbe37b..2f874540e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java @@ -67,32 +67,32 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark { @Param private Downsampling downsampling; - public void timeDownsampling(int reps) { - for(int i = 0; i < reps; i++) { - SAMFileReader reader = new SAMFileReader(inputFile); - ReadProperties readProperties = new ReadProperties(Collections.singletonList(new SAMReaderID(inputFile,new Tags())), - reader.getFileHeader(), - SAMFileHeader.SortOrder.coordinate, - false, - SAMFileReader.ValidationStringency.SILENT, - downsampling.create(), - new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)), - Collections.emptyList(), - Collections.emptyList(), - false, - (byte)0, - false); - - GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary()); - // Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out? - Iterator readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter()); - LegacyLocusIteratorByState locusIteratorByState = new LegacyLocusIteratorByState(readIterator,readProperties,genomeLocParser, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups()); - while(locusIteratorByState.hasNext()) { - locusIteratorByState.next().getLocation(); - } - reader.close(); - } - } +// public void timeDownsampling(int reps) { +// for(int i = 0; i < reps; i++) { +// SAMFileReader reader = new SAMFileReader(inputFile); +// ReadProperties readProperties = new ReadProperties(Collections.singletonList(new SAMReaderID(inputFile,new Tags())), +// reader.getFileHeader(), +// SAMFileHeader.SortOrder.coordinate, +// false, +// SAMFileReader.ValidationStringency.SILENT, +// downsampling.create(), +// new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)), +// Collections.emptyList(), +// Collections.emptyList(), +// false, +// (byte)0, +// false); +// +// GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary()); +// // Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out? +// Iterator readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter()); +// LegacyLocusIteratorByState locusIteratorByState = new LegacyLocusIteratorByState(readIterator,readProperties,genomeLocParser, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups()); +// while(locusIteratorByState.hasNext()) { +// locusIteratorByState.next().getLocation(); +// } +// reader.close(); +// } +// } private enum Downsampling { NONE { diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachinePerformance.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachinePerformance.java index 0fa55c651..51f0de4e8 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachinePerformance.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachinePerformance.java @@ -31,7 +31,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.locusiterator.old.SAMRecordAlignmentState; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -78,24 +77,24 @@ public class AlignmentStateMachinePerformance { } } break; - case OLD_STATE: - { - final SAMRecordAlignmentState alignmentStateMachine = new SAMRecordAlignmentState(read); - while ( alignmentStateMachine.stepForwardOnGenome() != null ) { - alignmentStateMachine.getRead(); - nIterations++; - } - } - break; +// case OLD_STATE: +// { +// final SAMRecordAlignmentState alignmentStateMachine = new SAMRecordAlignmentState(read); +// while ( alignmentStateMachine.stepForwardOnGenome() != null ) { +// alignmentStateMachine.getRead(); +// nIterations++; +// } +// } +// break; case NEW_LIBS: { - final List reads = Collections.nCopies(30, (SAMRecord)read); + final List reads = Collections.nCopies(30, (SAMRecord) read); final org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState libs = new org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState( new LocusIteratorByStateBaseTest.FakeCloseableIterator(reads.iterator()), LocusIteratorByStateBaseTest.createTestReadProperties(), genomeLocParser, - LocusIteratorByStateBaseTest.sampleListForSAMWithoutReadGroups()); + LocusIteratorByState.sampleListForSAMWithoutReadGroups()); while ( libs.hasNext() ) { AlignmentContext context = libs.next(); diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorBenchmark.java index 47a490f4f..5abe78ef7 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorBenchmark.java @@ -33,7 +33,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.locusiterator.old.SAMRecordAlignmentState; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -71,14 +70,29 @@ public class LocusIteratorBenchmark extends SimpleBenchmark { } } - public void timeOriginalLIBS(int rep) { +// public void timeOriginalLIBS(int rep) { +// for ( int i = 0; i < rep; i++ ) { +// final org.broadinstitute.sting.utils.locusiterator.old.LocusIteratorByState libs = +// new org.broadinstitute.sting.utils.locusiterator.old.LocusIteratorByState( +// new LocusIteratorByStateBaseTest.FakeCloseableIterator(reads.iterator()), +// LocusIteratorByStateBaseTest.createTestReadProperties(), +// genomeLocParser, +// LocusIteratorByState.sampleListForSAMWithoutReadGroups()); +// +// while ( libs.hasNext() ) { +// AlignmentContext context = libs.next(); +// } +// } +// } + + public void timeLegacyLIBS(int rep) { for ( int i = 0; i < rep; i++ ) { - final org.broadinstitute.sting.utils.locusiterator.old.LocusIteratorByState libs = - new org.broadinstitute.sting.utils.locusiterator.old.LocusIteratorByState( + final org.broadinstitute.sting.utils.locusiterator.legacy.LegacyLocusIteratorByState libs = + new org.broadinstitute.sting.utils.locusiterator.legacy.LegacyLocusIteratorByState( new LocusIteratorByStateBaseTest.FakeCloseableIterator(reads.iterator()), LocusIteratorByStateBaseTest.createTestReadProperties(), genomeLocParser, - LocusIteratorByStateBaseTest.sampleListForSAMWithoutReadGroups()); + LocusIteratorByState.sampleListForSAMWithoutReadGroups()); while ( libs.hasNext() ) { AlignmentContext context = libs.next(); @@ -93,7 +107,7 @@ public class LocusIteratorBenchmark extends SimpleBenchmark { new LocusIteratorByStateBaseTest.FakeCloseableIterator(reads.iterator()), LocusIteratorByStateBaseTest.createTestReadProperties(), genomeLocParser, - LocusIteratorByStateBaseTest.sampleListForSAMWithoutReadGroups()); + LocusIteratorByState.sampleListForSAMWithoutReadGroups()); while ( libs.hasNext() ) { AlignmentContext context = libs.next(); @@ -101,16 +115,16 @@ public class LocusIteratorBenchmark extends SimpleBenchmark { } } - public void timeOriginalLIBSStateMachine(int rep) { - for ( int i = 0; i < rep; i++ ) { - for ( final SAMRecord read : reads ) { - final SAMRecordAlignmentState alignmentStateMachine = new SAMRecordAlignmentState(read); - while ( alignmentStateMachine.stepForwardOnGenome() != null ) { - alignmentStateMachine.getGenomeOffset(); - } - } - } - } +// public void timeOriginalLIBSStateMachine(int rep) { +// for ( int i = 0; i < rep; i++ ) { +// for ( final SAMRecord read : reads ) { +// final SAMRecordAlignmentState alignmentStateMachine = new SAMRecordAlignmentState(read); +// while ( alignmentStateMachine.stepForwardOnGenome() != null ) { +// alignmentStateMachine.getGenomeOffset(); +// } +// } +// } +// } public void timeAlignmentStateMachine(int rep) { for ( int i = 0; i < rep; i++ ) { diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java index 6445f976f..5b9cdb112 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java @@ -57,22 +57,12 @@ public class LocusIteratorByStateBaseTest extends BaseTest { genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); } - /** - * For testing only. Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list - * for the system. - */ - public static List sampleListForSAMWithoutReadGroups() { - List samples = new ArrayList(); - samples.add(null); - return samples; - } - protected LocusIteratorByState makeLTBS(List reads, ReadProperties readAttributes) { return new LocusIteratorByState(new FakeCloseableIterator(reads.iterator()), readAttributes, genomeLocParser, - sampleListForSAMWithoutReadGroups()); + LocusIteratorByState.sampleListForSAMWithoutReadGroups()); } public static ReadProperties createTestReadProperties() { diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/ReadStateManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/ReadStateManagerUnitTest.java index 67916cfe4..78164e36b 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/ReadStateManagerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/ReadStateManagerUnitTest.java @@ -27,9 +27,6 @@ package org.broadinstitute.sting.utils.locusiterator; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.locusiterator.LIBSDownsamplingInfo; -import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByStateBaseTest; -import org.broadinstitute.sting.utils.locusiterator.old.SAMRecordAlignmentState; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -65,7 +62,7 @@ public class ReadStateManagerUnitTest extends LocusIteratorByStateBaseTest { } public void run() { - final List samples = sampleListForSAMWithoutReadGroups(); + final List samples = LocusIteratorByState.sampleListForSAMWithoutReadGroups(); final Iterator iterator = new LinkedList().iterator(); ReadStateManager readStateManager = new ReadStateManager(iterator, samples, LIBSDownsamplingInfo.NO_DOWNSAMPLING, false); ReadStateManager.PerSampleReadStateManager perSampleReadStateManager = readStateManager.new PerSampleReadStateManager(LIBSDownsamplingInfo.NO_DOWNSAMPLING); diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/old/LocusIteratorByStateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/old/LocusIteratorByStateUnitTest.java deleted file mode 100644 index 9fd2cdfeb..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/old/LocusIteratorByStateUnitTest.java +++ /dev/null @@ -1,463 +0,0 @@ -//package org.broadinstitute.sting.utils.locusiterator.old; -// -//import net.sf.samtools.*; -//import org.broadinstitute.sting.gatk.ReadProperties; -//import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -//import org.broadinstitute.sting.gatk.downsampling.DownsampleType; -//import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod; -//import org.broadinstitute.sting.utils.NGSPlatform; -//import org.broadinstitute.sting.utils.Utils; -//import org.broadinstitute.sting.utils.locusiterator.LIBS_position; -//import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByStateBaseTest; -//import org.broadinstitute.sting.utils.locusiterator.old.LocusIteratorByState; -//import org.broadinstitute.sting.utils.pileup.PileupElement; -//import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -//import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -//import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; -//import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -//import org.testng.Assert; -//import org.testng.annotations.DataProvider; -//import org.testng.annotations.Test; -// -//import java.util.*; -// -///** -// * testing of the new (non-legacy) version of LocusIteratorByState -// */ -//public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { -// -// // TODO -- REMOVE ME WHEN LIBS IS FIXED -// // TODO -- CURRENT CODE DOESN'T CORRECTLY COMPUTE THINGS LIKE BEFORE DELETION, AFTER INSERTION, ETC -// private final static boolean ALLOW_BROKEN_LIBS_STATE = true; -// -// protected LocusIteratorByState li; -// -// @Test -// public void testXandEQOperators() { -// final byte[] bases1 = new byte[] {'A','A','A','A','A','A','A','A','A','A'}; -// final byte[] bases2 = new byte[] {'A','A','A','C','A','A','A','A','A','C'}; -// -// // create a test version of the Reads object -// ReadProperties readAttributes = createTestReadProperties(); -// -// SAMRecord r1 = ArtificialSAMUtils.createArtificialRead(header,"r1",0,1,10); -// r1.setReadBases(bases1); -// r1.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20}); -// r1.setCigarString("10M"); -// -// SAMRecord r2 = ArtificialSAMUtils.createArtificialRead(header,"r2",0,1,10); -// r2.setReadBases(bases2); -// r2.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20}); -// r2.setCigarString("3=1X5=1X"); -// -// SAMRecord r3 = ArtificialSAMUtils.createArtificialRead(header,"r3",0,1,10); -// r3.setReadBases(bases2); -// r3.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20}); -// r3.setCigarString("3=1X5M1X"); -// -// SAMRecord r4 = ArtificialSAMUtils.createArtificialRead(header,"r4",0,1,10); -// r4.setReadBases(bases2); -// r4.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20}); -// r4.setCigarString("10M"); -// -// List reads = Arrays.asList(r1, r2, r3, r4); -// -// // create the iterator by state with the fake reads and fake records -// li = makeLTBS(reads,readAttributes); -// -// while (li.hasNext()) { -// AlignmentContext context = li.next(); -// ReadBackedPileup pileup = context.getBasePileup(); -// Assert.assertEquals(pileup.depthOfCoverage(), 4); -// } -// } -// -// @Test -// public void testIndelsInRegularPileup() { -// final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'}; -// final byte[] indelBases = new byte[] {'A','A','A','A','C','T','A','A','A','A','A','A'}; -// -// // create a test version of the Reads object -// ReadProperties readAttributes = createTestReadProperties(); -// -// SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10); -// before.setReadBases(bases); -// before.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20}); -// before.setCigarString("10M"); -// -// SAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,2,10); -// during.setReadBases(indelBases); -// during.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20}); -// during.setCigarString("4M2I6M"); -// -// SAMRecord after = ArtificialSAMUtils.createArtificialRead(header,"after",0,3,10); -// after.setReadBases(bases); -// after.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20}); -// after.setCigarString("10M"); -// -// List reads = Arrays.asList(before, during, after); -// -// // create the iterator by state with the fake reads and fake records -// li = makeLTBS(reads,readAttributes); -// -// boolean foundIndel = false; -// while (li.hasNext()) { -// AlignmentContext context = li.next(); -// ReadBackedPileup pileup = context.getBasePileup().getBaseFilteredPileup(10); -// for (PileupElement p : pileup) { -// if (p.isBeforeInsertion()) { -// foundIndel = true; -// Assert.assertEquals(p.getLengthOfImmediatelyFollowingIndel(), 2, "Wrong event length"); -// Assert.assertEquals(p.getBasesOfImmediatelyFollowingInsertion(), "CT", "Inserted bases are incorrect"); -// break; -// } -// } -// -// } -// -// Assert.assertTrue(foundIndel,"Indel in pileup not found"); -// } -// -// @Test -// public void testWholeIndelReadInIsolation() { -// final int firstLocus = 44367789; -// -// // create a test version of the Reads object -// ReadProperties readAttributes = createTestReadProperties(); -// -// SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header, "indelOnly", 0, firstLocus, 76); -// indelOnlyRead.setReadBases(Utils.dupBytes((byte)'A',76)); -// indelOnlyRead.setBaseQualities(Utils.dupBytes((byte) '@', 76)); -// indelOnlyRead.setCigarString("76I"); -// -// List reads = Arrays.asList(indelOnlyRead); -// -// // create the iterator by state with the fake reads and fake records -// li = makeLTBS(reads, readAttributes); -// -// // Traditionally, reads that end with indels bleed into the pileup at the following locus. Verify that the next pileup contains this read -// // and considers it to be an indel-containing read. -// Assert.assertTrue(li.hasNext(),"Should have found a whole-indel read in the normal base pileup without extended events enabled"); -// AlignmentContext alignmentContext = li.next(); -// Assert.assertEquals(alignmentContext.getLocation().getStart(), firstLocus, "Base pileup is at incorrect location."); -// ReadBackedPileup basePileup = alignmentContext.getBasePileup(); -// Assert.assertEquals(basePileup.getReads().size(),1,"Pileup is of incorrect size"); -// Assert.assertSame(basePileup.getReads().get(0), indelOnlyRead, "Read in pileup is incorrect"); -// } -// -// /** -// * Test to make sure that reads supporting only an indel (example cigar string: 76I) do -// * not negatively influence the ordering of the pileup. -// */ -// @Test -// public void testWholeIndelRead() { -// final int firstLocus = 44367788, secondLocus = firstLocus + 1; -// -// SAMRecord leadingRead = ArtificialSAMUtils.createArtificialRead(header,"leading",0,firstLocus,76); -// leadingRead.setReadBases(Utils.dupBytes((byte)'A',76)); -// leadingRead.setBaseQualities(Utils.dupBytes((byte)'@',76)); -// leadingRead.setCigarString("1M75I"); -// -// SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header,"indelOnly",0,secondLocus,76); -// indelOnlyRead.setReadBases(Utils.dupBytes((byte) 'A', 76)); -// indelOnlyRead.setBaseQualities(Utils.dupBytes((byte)'@',76)); -// indelOnlyRead.setCigarString("76I"); -// -// SAMRecord fullMatchAfterIndel = ArtificialSAMUtils.createArtificialRead(header,"fullMatch",0,secondLocus,76); -// fullMatchAfterIndel.setReadBases(Utils.dupBytes((byte)'A',76)); -// fullMatchAfterIndel.setBaseQualities(Utils.dupBytes((byte)'@',76)); -// fullMatchAfterIndel.setCigarString("75I1M"); -// -// List reads = Arrays.asList(leadingRead, indelOnlyRead, fullMatchAfterIndel); -// -// // create the iterator by state with the fake reads and fake records -// li = makeLTBS(reads, createTestReadProperties()); -// int currentLocus = firstLocus; -// int numAlignmentContextsFound = 0; -// -// while(li.hasNext()) { -// AlignmentContext alignmentContext = li.next(); -// Assert.assertEquals(alignmentContext.getLocation().getStart(),currentLocus,"Current locus returned by alignment context is incorrect"); -// -// if(currentLocus == firstLocus) { -// List readsAtLocus = alignmentContext.getBasePileup().getReads(); -// Assert.assertEquals(readsAtLocus.size(),1,"Wrong number of reads at locus " + currentLocus); -// Assert.assertSame(readsAtLocus.get(0),leadingRead,"leadingRead absent from pileup at locus " + currentLocus); -// } -// else if(currentLocus == secondLocus) { -// List readsAtLocus = alignmentContext.getBasePileup().getReads(); -// Assert.assertEquals(readsAtLocus.size(),2,"Wrong number of reads at locus " + currentLocus); -// Assert.assertSame(readsAtLocus.get(0),indelOnlyRead,"indelOnlyRead absent from pileup at locus " + currentLocus); -// Assert.assertSame(readsAtLocus.get(1),fullMatchAfterIndel,"fullMatchAfterIndel absent from pileup at locus " + currentLocus); -// } -// -// currentLocus++; -// numAlignmentContextsFound++; -// } -// -// Assert.assertEquals(numAlignmentContextsFound, 2, "Found incorrect number of alignment contexts"); -// } -// -// /** -// * Test to make sure that reads supporting only an indel (example cigar string: 76I) are represented properly -// */ -// @Test -// public void testWholeIndelReadRepresentedTest() { -// final int firstLocus = 44367788, secondLocus = firstLocus + 1; -// -// SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,secondLocus,1); -// read1.setReadBases(Utils.dupBytes((byte) 'A', 1)); -// read1.setBaseQualities(Utils.dupBytes((byte) '@', 1)); -// read1.setCigarString("1I"); -// -// List reads = Arrays.asList(read1); -// -// // create the iterator by state with the fake reads and fake records -// li = makeLTBS(reads, createTestReadProperties()); -// -// while(li.hasNext()) { -// AlignmentContext alignmentContext = li.next(); -// ReadBackedPileup p = alignmentContext.getBasePileup(); -// Assert.assertTrue(p.getNumberOfElements() == 1); -// PileupElement pe = p.iterator().next(); -// Assert.assertTrue(pe.isBeforeInsertion()); -// Assert.assertFalse(pe.isAfterInsertion()); -// Assert.assertEquals(pe.getBasesOfImmediatelyFollowingInsertion(), "A"); -// } -// -// SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,secondLocus,10); -// read2.setReadBases(Utils.dupBytes((byte) 'A', 10)); -// read2.setBaseQualities(Utils.dupBytes((byte) '@', 10)); -// read2.setCigarString("10I"); -// -// reads = Arrays.asList(read2); -// -// // create the iterator by state with the fake reads and fake records -// li = makeLTBS(reads, createTestReadProperties()); -// -// while(li.hasNext()) { -// AlignmentContext alignmentContext = li.next(); -// ReadBackedPileup p = alignmentContext.getBasePileup(); -// Assert.assertTrue(p.getNumberOfElements() == 1); -// PileupElement pe = p.iterator().next(); -// Assert.assertTrue(pe.isBeforeInsertion()); -// Assert.assertFalse(pe.isAfterInsertion()); -// Assert.assertEquals(pe.getBasesOfImmediatelyFollowingInsertion(), "AAAAAAAAAA"); -// } -// } -// -// //////////////////////////////////////////// -// // comprehensive LIBS/PileupElement tests // -// //////////////////////////////////////////// -// -// @DataProvider(name = "LIBSTest") -// public Object[][] makeLIBSTest() { -// final List tests = new LinkedList(); -// -// tests.add(new Object[]{new LIBSTest("1I", 1)}); -// tests.add(new Object[]{new LIBSTest("10I", 10)}); -// tests.add(new Object[]{new LIBSTest("2M2I2M", 6)}); -// tests.add(new Object[]{new LIBSTest("2M2I", 4)}); -// //TODO -- uncomment these when LIBS is fixed -// //{new LIBSTest("2I2M", 4, Arrays.asList(2,3), Arrays.asList(IS_AFTER_INSERTION_FLAG,0))}, -// //{new LIBSTest("1I1M1D1M", 3, Arrays.asList(0,1), Arrays.asList(IS_AFTER_INSERTION_FLAG | IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG))}, -// //{new LIBSTest("1S1I1M", 3, Arrays.asList(2), Arrays.asList(IS_AFTER_INSERTION_FLAG))}, -// //{new LIBSTest("1M2D2M", 3)}, -// tests.add(new Object[]{new LIBSTest("1S1M", 2)}); -// tests.add(new Object[]{new LIBSTest("1M1S", 2)}); -// tests.add(new Object[]{new LIBSTest("1S1M1I", 3)}); -// -// return tests.toArray(new Object[][]{}); -// -// // TODO -- enable combinatorial tests here when LIBS is fixed -//// return createLIBSTests( -//// Arrays.asList(1, 10), -//// Arrays.asList(1, 2, 3)); -// } -// -// @Test(dataProvider = "LIBSTest") -// public void testLIBS(LIBSTest params) { -// if ( params.getElements() == null || params.getElements().get(0).getOperator() == CigarOperator.I ) -// // TODO -- ENABLE ME WHEN LIBS IS FIXED -// return; -// -// // create the iterator by state with the fake reads and fake records -// final GATKSAMRecord read = params.makeRead(); -// li = makeLTBS(Arrays.asList((SAMRecord)read), createTestReadProperties()); -// final LIBS_position tester = new LIBS_position(read); -// -// int bpVisited = 0; -// while ( li.hasNext() ) { -// bpVisited++; -// -// AlignmentContext alignmentContext = li.next(); -// ReadBackedPileup p = alignmentContext.getBasePileup(); -// Assert.assertTrue(p.getNumberOfElements() == 1); -// PileupElement pe = p.iterator().next(); -// -// tester.stepForwardOnGenome(); -// -// if ( ! ALLOW_BROKEN_LIBS_STATE ) { -// Assert.assertEquals(pe.isBeforeDeletedBase(), tester.isBeforeDeletedBase); -// Assert.assertEquals(pe.isBeforeDeletionStart(), tester.isBeforeDeletionStart); -// Assert.assertEquals(pe.isAfterDeletedBase(), tester.isAfterDeletedBase); -// Assert.assertEquals(pe.isAfterDeletionEnd(), tester.isAfterDeletionEnd); -// Assert.assertEquals(pe.isBeforeInsertion(), tester.isBeforeInsertion); -// Assert.assertEquals(pe.isAfterInsertion(), tester.isAfterInsertion); -// Assert.assertEquals(pe.isNextToSoftClip(), tester.isNextToSoftClip); -// } -// -// Assert.assertEquals(pe.getOffset(), tester.getCurrentReadOffset()); -// } -// -// // min is one because always visit something, even for 10I reads -// final int expectedBpToVisit = Math.max(read.getAlignmentEnd() - read.getAlignmentStart() + 1, 1); -// Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp"); -// } -// -// // ------------------------------------------------------------ -// // -// // Tests for keeping reads -// // -// // ------------------------------------------------------------ -// -// @DataProvider(name = "LIBSKeepSubmittedReads") -// public Object[][] makeLIBSKeepSubmittedReads() { -// final List tests = new LinkedList(); -// -// for ( final boolean doSampling : Arrays.asList(true, false) ) { -// for ( final int nReadsPerLocus : Arrays.asList(1, 10) ) { -// for ( final int nLoci : Arrays.asList(1, 10, 25) ) { -// for ( final int nSamples : Arrays.asList(1, 2, 10) ) { -// for ( final boolean keepReads : Arrays.asList(true, false) ) { -// for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true, false) ) { -//// for ( final int nReadsPerLocus : Arrays.asList(1) ) { -//// for ( final int nLoci : Arrays.asList(10) ) { -//// for ( final int nSamples : Arrays.asList(1) ) { -//// for ( final boolean keepReads : Arrays.asList(true) ) { -//// for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true) ) { -// tests.add(new Object[]{nReadsPerLocus, nLoci, nSamples, keepReads, grabReadsAfterEachCycle, doSampling}); -// } -// } -// } -// } -// } -// } -// -// return tests.toArray(new Object[][]{}); -// } -// -// @Test(enabled = true, dataProvider = "LIBSKeepSubmittedReads") -// public void testLIBSKeepSubmittedReads(final int nReadsPerLocus, -// final int nLoci, -// final int nSamples, -// final boolean keepReads, -// final boolean grabReadsAfterEachCycle, -// final boolean downsample) { -// logger.warn(String.format("testLIBSKeepSubmittedReads %d %d %d %b %b %b", nReadsPerLocus, nLoci, nSamples, keepReads, grabReadsAfterEachCycle, downsample)); -// final int readLength = 10; -// -// final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 100000); -// final List samples = new ArrayList(nSamples); -// for ( int i = 0; i < nSamples; i++ ) { -// final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("rg" + i); -// final String sample = "sample" + i; -// samples.add(sample); -// rg.setSample(sample); -// rg.setPlatform(NGSPlatform.ILLUMINA.getDefaultPlatform()); -// header.addReadGroup(rg); -// } -// -// final int maxCoveragePerSampleAtLocus = nReadsPerLocus * readLength / 2; -// final int maxDownsampledCoverage = Math.max(maxCoveragePerSampleAtLocus / 2, 1); -// final DownsamplingMethod downsampler = downsample -// ? new DownsamplingMethod(DownsampleType.BY_SAMPLE, maxDownsampledCoverage, null, false) -// : new DownsamplingMethod(DownsampleType.NONE, null, null, false); -// final List reads = ArtificialSAMUtils.createReadStream(nReadsPerLocus, nLoci, header, 1, readLength); -// li = new LocusIteratorByState(new FakeCloseableIterator(reads.iterator()), -// createTestReadProperties(downsampler, keepReads), -// genomeLocParser, -// samples); -// -// final Set seenSoFar = new HashSet(); -// final Set keptReads = new HashSet(); -// int bpVisited = 0; -// while ( li.hasNext() ) { -// bpVisited++; -// final AlignmentContext alignmentContext = li.next(); -// final ReadBackedPileup p = alignmentContext.getBasePileup(); -// -// if ( downsample ) { -// // just not a safe test -// //Assert.assertTrue(p.getNumberOfElements() <= maxDownsampledCoverage * nSamples, "Too many reads at locus after downsampling"); -// } else { -// final int minPileupSize = nReadsPerLocus * nSamples; -// Assert.assertTrue(p.getNumberOfElements() >= minPileupSize); -// } -// -// seenSoFar.addAll(p.getReads()); -// if ( keepReads && grabReadsAfterEachCycle ) { -// final List locusReads = li.transferReadsFromAllPreviousPileups(); -// -// // the number of reads starting here -// int nReadsStartingHere = 0; -// for ( final SAMRecord read : p.getReads() ) -// if ( read.getAlignmentStart() == alignmentContext.getPosition() ) -// nReadsStartingHere++; -// -// if ( downsample ) -// // with downsampling we might have some reads here that were downsampled away -// // in the pileup -// Assert.assertTrue(locusReads.size() >= nReadsStartingHere); -// else -// Assert.assertEquals(locusReads.size(), nReadsStartingHere); -// keptReads.addAll(locusReads); -// -// // check that all reads we've seen so far are in our keptReads -// for ( final SAMRecord read : seenSoFar ) { -// Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read); -// } -// } -// -// if ( ! keepReads ) -// Assert.assertTrue(li.getReadsFromAllPreviousPileups().isEmpty(), "Not keeping reads but the underlying list of reads isn't empty"); -// } -// -// if ( keepReads && ! grabReadsAfterEachCycle ) -// keptReads.addAll(li.transferReadsFromAllPreviousPileups()); -// -// if ( ! downsample ) { // downsampling may drop loci -// final int expectedBpToVisit = nLoci + readLength - 1; -// Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp"); -// } -// -// if ( keepReads ) { -// // check we have the right number of reads -// final int totalReads = nLoci * nReadsPerLocus * nSamples; -// if ( ! downsample ) { // downsampling may drop reads -// Assert.assertEquals(keptReads.size(), totalReads, "LIBS didn't keep the right number of reads during the traversal"); -// -// // check that the order of reads is the same as in our read list -// for ( int i = 0; i < reads.size(); i++ ) { -// final SAMRecord inputRead = reads.get(i); -// final SAMRecord keptRead = reads.get(i); -// Assert.assertSame(keptRead, inputRead, "Input reads and kept reads differ at position " + i); -// } -// } else { -// Assert.assertTrue(keptReads.size() <= totalReads, "LIBS didn't keep the right number of reads during the traversal"); -// } -// -// // check uniqueness -// final Set readNames = new HashSet(); -// for ( final SAMRecord read : keptReads ) { -// Assert.assertFalse(readNames.contains(read.getReadName()), "Found duplicate reads in the kept reads"); -// readNames.add(read.getReadName()); -// } -// -// // check that all reads we've seen are in our keptReads -// for ( final SAMRecord read : seenSoFar ) { -// Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read); -// } -// } -// } -//} diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/old/SAMRecordAlignmentStateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/old/SAMRecordAlignmentStateUnitTest.java deleted file mode 100644 index 9835e6e9c..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/old/SAMRecordAlignmentStateUnitTest.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.locusiterator.old; - -import org.broadinstitute.sting.utils.locusiterator.LIBS_position; -import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByStateBaseTest; -import org.broadinstitute.sting.utils.locusiterator.old.SAMRecordAlignmentState; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.Arrays; - -/** - * testing of the new (non-legacy) version of LocusIteratorByState - */ -public class SAMRecordAlignmentStateUnitTest extends LocusIteratorByStateBaseTest { - @DataProvider(name = "AlignmentStateTest") - public Object[][] makeAlignmentStateTest() { -// return new Object[][]{{new LIBSTest("1I", 1)}}; - return createLIBSTests( - Arrays.asList(1, 2), - Arrays.asList(1, 2, 3, 4)); - } - - @Test(dataProvider = "AlignmentStateTest") - public void testAlignmentStateTest(LIBSTest params) { - final GATKSAMRecord read = params.makeRead(); - final SAMRecordAlignmentState state = new SAMRecordAlignmentState(read); - final LIBS_position tester = new LIBS_position(read); - - Assert.assertSame(state.getRead(), read); - Assert.assertNotNull(state.toString()); - - int bpVisited = 0; - int lastOffset = -1; - while ( state.stepForwardOnGenome() != null ) { - bpVisited++; - tester.stepForwardOnGenome(); - Assert.assertTrue(state.getReadOffset() >= lastOffset, "Somehow read offsets are decreasing: lastOffset " + lastOffset + " current " + state.getReadOffset()); - Assert.assertEquals(state.getReadOffset(), tester.getCurrentReadOffset(), "Read offsets are wrong at " + bpVisited); - - // TODO -- state.peekBackwardOnGenome(); - // TODO -- state.peekForwardOnGenome(); - // TODO -- state.getCurrentCigarOperator() - // TODO -- state.getGenomeOffset(); - // TODO -- state.getGenomePosition(); - // TODO -- Assert.assertEquals(state.getLocation(genomeLocParser), EXPECTATION); - - lastOffset = state.getReadOffset(); - } - - // min is one because always visit something, even for 10I reads - final int expectedBpToVisit = read.getAlignmentEnd() - read.getAlignmentStart() + 1; - Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp"); - } -}