From 2a5be1debe2dac1a969a14cb6e9dda45869a6873 Mon Sep 17 00:00:00 2001 From: hanna Date: Fri, 22 May 2009 19:12:00 +0000 Subject: [PATCH] Cleanup in datasources.providers namespace. Make it easier for others writing traversal engines to use. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@803 348d0f76-0448-11de-a6fe-93d51630548a --- .../dataSources/providers/AllLocusView.java | 87 ++++++++ .../providers/CoveredLocusView.java | 64 ++++++ .../providers/IterableLocusContextQueue.java | 119 ----------- .../providers/LocusContextQueue.java | 118 ---------- .../providers/LocusReferenceView.java | 70 ++++++ .../gatk/dataSources/providers/LocusView.java | 201 ++++++++++++++++++ .../providers/ReadReferenceView.java | 56 +++++ .../gatk/dataSources/providers/ReadView.java | 64 ++++++ .../providers/ReferenceOrderedView.java | 6 +- .../providers/ReferenceProvider.java | 102 --------- .../dataSources/providers/ReferenceView.java | 51 +++++ .../providers/SeekableLocusContextQueue.java | 124 ----------- .../providers/ShardDataProvider.java | 72 ++++--- .../gatk/dataSources/providers/View.java | 8 + .../GenomeLocusIterator.java} | 13 +- .../gatk/traversals/TraverseDuplicates.java | 3 +- .../sting/gatk/traversals/TraverseLoci.java | 57 +++-- .../sting/gatk/traversals/TraverseReads.java | 9 +- .../providers/AllLocusViewTest.java | 59 +++++ ...eueTest.java => CoveredLocusViewTest.java} | 48 ++--- ...ueTemplate.java => LocusViewTemplate.java} | 62 +++--- .../SeekableLocusContextQueueTest.java | 90 -------- 22 files changed, 794 insertions(+), 689 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/gatk/dataSources/providers/AllLocusView.java create mode 100755 java/src/org/broadinstitute/sting/gatk/dataSources/providers/CoveredLocusView.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/dataSources/providers/IterableLocusContextQueue.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/dataSources/providers/LocusContextQueue.java create mode 100755 java/src/org/broadinstitute/sting/gatk/dataSources/providers/LocusReferenceView.java create mode 100755 java/src/org/broadinstitute/sting/gatk/dataSources/providers/LocusView.java create mode 100755 java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReadReferenceView.java create mode 100755 java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReadView.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceProvider.java create mode 100755 java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceView.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/dataSources/providers/SeekableLocusContextQueue.java rename java/src/org/broadinstitute/sting/gatk/{dataSources/providers/ReferenceLocusIterator.java => iterators/GenomeLocusIterator.java} (77%) create mode 100755 java/test/org/broadinstitute/sting/gatk/dataSources/providers/AllLocusViewTest.java rename java/test/org/broadinstitute/sting/gatk/dataSources/providers/{IterableLocusContextQueueTest.java => CoveredLocusViewTest.java} (52%) rename java/test/org/broadinstitute/sting/gatk/dataSources/providers/{LocusContextQueueTemplate.java => LocusViewTemplate.java} (82%) delete mode 100755 java/test/org/broadinstitute/sting/gatk/dataSources/providers/SeekableLocusContextQueueTest.java diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/AllLocusView.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/AllLocusView.java new file mode 100755 index 000000000..7574caa23 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/AllLocusView.java @@ -0,0 +1,87 @@ +package org.broadinstitute.sting.gatk.dataSources.providers; + +import java.util.NoSuchElementException; +import java.util.ArrayList; + +import org.broadinstitute.sting.gatk.iterators.LocusIterator; +import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.utils.GenomeLoc; +import net.sf.samtools.SAMRecord; +/** + * User: hanna + * Date: May 13, 2009 + * Time: 3:32:30 PM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * A LocusView over which the user can iterate. + */ + +public class AllLocusView extends LocusView { + private GenomeLocusIterator locusIterator; + + /** + * Gets the current position in the view. + */ + private GenomeLoc nextPosition = null; + + /** + * What's the context for the last locus accessed? + * @param provider + */ + private LocusContext nextLocusContext = null; + + /** + * Create a new queue of locus contexts. + * @param provider + */ + public AllLocusView(ShardDataProvider provider) { + super( provider ); + // Seed the state tracking members with the first possible seek position and the first possible locus context. + locusIterator = new GenomeLocusIterator( provider.getShard().getGenomeLoc() ); + if( locusIterator.hasNext() ) { + nextPosition = locusIterator.next(); + nextLocusContext = hasNextLocusContext() ? nextLocusContext() : createEmptyLocusContext(nextPosition); + } + } + + public boolean hasNext() { + return nextPosition != null; + } + + public LocusContext next() { + GenomeLoc currentPosition = nextPosition; + if( currentPosition == null ) + throw new NoSuchElementException("No next is available in the all locus view"); + + // Determine the next locus. + nextPosition = locusIterator.hasNext() ? locusIterator.next() : null; + + // Crank the iterator to (if possible) or past the next context. + while( nextLocusContext != null && nextLocusContext.getLocation().isBefore(currentPosition) && hasNextLocusContext() ) + nextLocusContext = nextLocusContext(); + + // If actual data is present, return it. Otherwise, return empty data. + if( nextLocusContext != null && nextLocusContext.getLocation().equals(currentPosition) ) + return nextLocusContext; + else + return createEmptyLocusContext( currentPosition ); + } + + /** + * Creates a blank locus context at the specified location. + * @param site Site at which to create the blank locus context. + * @return empty context. + */ + private LocusContext createEmptyLocusContext( GenomeLoc site ) { + return new LocusContext(site, new ArrayList(), new ArrayList()); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/CoveredLocusView.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/CoveredLocusView.java new file mode 100755 index 000000000..6827bb66e --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/CoveredLocusView.java @@ -0,0 +1,64 @@ +package org.broadinstitute.sting.gatk.dataSources.providers; + +import org.broadinstitute.sting.gatk.iterators.LocusContextIterator; +import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.traversals.TraversalEngine; +import org.broadinstitute.sting.gatk.dataSources.shards.Shard; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.apache.log4j.Logger; +import net.sf.samtools.SAMRecord; + +import java.util.ArrayList; +import java.util.Iterator; + +import edu.mit.broad.picard.filter.FilteringIterator; +/** + * User: hanna + * Date: May 12, 2009 + * Time: 11:24:42 AM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * A queue of locus contexts. Provides unidirectional seek. Stripped down + * implementation of java.util.Queue interface. + */ + +public class CoveredLocusView extends LocusView { + /** + * Gets the position to which the last seek was requested. + */ + private GenomeLoc seekPoint; + + /** + * What's the context for the last locus accessed? + * @param provider + */ + private LocusContext nextLocusContext = null; + + private static Logger logger = Logger.getLogger(CoveredLocusView.class); + + /** + * Create a new queue of locus contexts. + * @param provider + */ + public CoveredLocusView(ShardDataProvider provider) { + super(provider); + } + + public boolean hasNext() { + return hasNextLocusContext(); + } + + public LocusContext next() { + return nextLocusContext(); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/IterableLocusContextQueue.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/IterableLocusContextQueue.java deleted file mode 100755 index 65312d289..000000000 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/IterableLocusContextQueue.java +++ /dev/null @@ -1,119 +0,0 @@ -package org.broadinstitute.sting.gatk.dataSources.providers; - -import java.util.NoSuchElementException; - -import org.broadinstitute.sting.gatk.iterators.LocusIterator; -import org.broadinstitute.sting.gatk.LocusContext; -import org.broadinstitute.sting.utils.GenomeLoc; -/** - * User: hanna - * Date: May 13, 2009 - * Time: 3:32:30 PM - * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT - * Software and documentation are copyright 2005 by the Broad Institute. - * All rights are reserved. - * - * Users acknowledge that this software is supplied without any warranty or support. - * The Broad Institute is not responsible for its use, misuse, or - * functionality. - */ - -/** - * A LocusContextQueue over which the user can iterate. - */ - -public class IterableLocusContextQueue extends LocusContextQueue implements LocusIterator { - /** - * What's the context for the last locus accessed? - * @param provider - */ - private LocusContext prefetched = null; - - /** - * Has this prefetch been consumed? If this flag is set, - * the prefetch will skip to the next argument in the system. - */ - private boolean prefetchConsumed = true; - - /** - * Create a new queue of locus contexts. - * @param provider - */ - public IterableLocusContextQueue(ShardDataProvider provider) { - super( provider ); - } - - /** - * Is there another locus present in this iterator. - * @return True if another locus present in this iterator. Otherwise, false. - */ - public boolean hasNext() { - prefetchLocusContext(); - return prefetched != null; - } - - /** - * Retrieves the next element in the queue. - * @return Next element in the queue. - */ - public GenomeLoc next() { - prefetchLocusContext(); - prefetchConsumed = true; - // Signal that the prefetcher needs to grab another entry off the queue. - return prefetched.getLocation(); - } - - /** - * Find the next locus context within the bounds of a member variable and store - * it in the prefetched member variable. When the prefetch is consumed, the 'consumer' - * should signal it as such by marking prefetchConsumed = true. - */ - private void prefetchLocusContext() { - if( !prefetchConsumed ) - return; - - prefetched = null; - prefetchConsumed = false; - - // If another locus context bounded by this shard exists, find it. - boolean prefetchOutOfBounds = true; - while( hasNextLocusContext() && prefetchOutOfBounds ) { - prefetched = getNextLocusContext(); - prefetchOutOfBounds = (prefetched.getLocation().isBefore(shard.getGenomeLoc()) || - prefetched.getLocation().isPast(shard.getGenomeLoc())); - } - - // Can't find a valid prefetch? Set prefetch to null. If prefetched == null and - // prefetchConsumed == false, the queue is out of entries. - if( prefetchOutOfBounds ) - prefetched = null; - } - - /** - * Unsupported. - */ - public void remove() { - throw new UnsupportedOperationException("Unable to remove elements from this queue."); - } - - /** - * Peek at the next locus context in the chain. - * @return - */ - public LocusContext peek() { - if( prefetched == null ) - throw new NoSuchElementException("No more elements remaining in queue"); - return prefetched; - } - - /** - * Seek to the specified position in the contig. - * @param seekPoint - */ - public LocusContextQueue seek( GenomeLoc seekPoint ) { - if( prefetched == null || !seekPoint.equals(prefetched.getLocation()) ) - throw new IllegalArgumentException("IterableLocusContextQueue doesn't support seeking and iterator is in the wrong position."); - return this; - } - -} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/LocusContextQueue.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/LocusContextQueue.java deleted file mode 100755 index a8adf896f..000000000 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/LocusContextQueue.java +++ /dev/null @@ -1,118 +0,0 @@ -package org.broadinstitute.sting.gatk.dataSources.providers; - -import org.broadinstitute.sting.gatk.LocusContext; -import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.gatk.dataSources.shards.Shard; -import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger; -import org.broadinstitute.sting.gatk.iterators.LocusContextIterator; -import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; -import org.broadinstitute.sting.utils.GenomeLoc; -import net.sf.samtools.SAMRecord; - -import java.util.Iterator; - -import edu.mit.broad.picard.filter.FilteringIterator; -import edu.mit.broad.picard.filter.SamRecordFilter; -/** - * User: hanna - * Date: May 13, 2009 - * Time: 3:30:16 PM - * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT - * Software and documentation are copyright 2005 by the Broad Institute. - * All rights are reserved. - * - * Users acknowledge that this software is supplied without any warranty or support. - * The Broad Institute is not responsible for its use, misuse, or - * functionality. - */ - -/** - * A queue of locus context entries. - */ - -public abstract class LocusContextQueue { - protected Shard shard; - - private Reads sourceInfo; - private LocusContextIterator loci; - - public LocusContextQueue(ShardDataProvider provider) { - Iterator reads = new FilteringIterator(provider.getReadIterator(), new LocusStreamFilterFunc()); - this.loci = new LocusContextIteratorByHanger(reads); - this.sourceInfo = provider.getReadIterator().getSourceInfo(); - this.shard = provider.getShard(); - } - - - /** - * Get the locus context at the given position. - * @return Locus context, or null if no locus context exists at this position. - */ - public abstract LocusContext peek(); - - /** - * Seek to the given point the queue of locus contexts. - * @param target Target base pair to which to seek. Must be a single base pair. - * @return an instance of itself for parameter chaining. - */ - public abstract LocusContextQueue seek(GenomeLoc target); - - /** - * Gets the next locus context, applying filtering as necessary. - * @return Locus context to work with. - */ - protected LocusContext getNextLocusContext() { - LocusContext next = loci.next(); - if( sourceInfo.getDownsampleToCoverage() != null ) - next.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() ); - return next; - } - - /** - * hasNext()-style iterator for base iterator. - * @return - */ - protected boolean hasNextLocusContext() { - return loci.hasNext(); - } - - /** - * Class to filter out un-handle-able reads from the stream. We currently are skipping - * unmapped reads, non-primary reads, unaligned reads, and duplicate reads. - */ - private static class LocusStreamFilterFunc implements SamRecordFilter { - SAMRecord lastRead = null; - public boolean filterOut(SAMRecord rec) { - boolean result = false; - String why = ""; - if (rec.getReadUnmappedFlag()) { - TraversalStatistics.nUnmappedReads++; - result = true; - why = "Unmapped"; - } else if (rec.getNotPrimaryAlignmentFlag()) { - TraversalStatistics.nNotPrimary++; - result = true; - why = "Not Primary"; - } else if (rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) { - TraversalStatistics.nBadAlignments++; - result = true; - why = "No alignment start"; - } else if (rec.getDuplicateReadFlag()) { - TraversalStatistics.nDuplicates++; - result = true; - why = "Duplicate reads"; - } - else { - result = false; - } - - if (result) { - TraversalStatistics.nSkippedReads++; - //System.out.printf(" [filter] %s => %b %s", rec.getReadName(), result, why); - } else { - TraversalStatistics.nReads++; - } - return result; - } - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/LocusReferenceView.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/LocusReferenceView.java new file mode 100755 index 000000000..0c0388eb9 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/LocusReferenceView.java @@ -0,0 +1,70 @@ +package org.broadinstitute.sting.gatk.dataSources.providers; + +import org.broadinstitute.sting.utils.GenomeLoc; +import edu.mit.broad.picard.reference.ReferenceSequence; +import net.sf.samtools.util.StringUtil; +/** + * User: hanna + * Date: May 22, 2009 + * Time: 12:24:23 PM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * Provides access to the portion of the reference covering a single locus. + */ +public class LocusReferenceView extends ReferenceView { + /** + * Bound the reference view to make sure all accesses are within the shard. + */ + private final GenomeLoc bounds; + + /** + * Track the reference sequence and the last point accessed. Used to + * track state when traversing over the reference. + */ + private ReferenceSequence referenceSequence; + + /** + * Create a new locus reference view. + * @param provider source for locus data. + */ + public LocusReferenceView( ShardDataProvider provider ) { + super( provider ); + bounds = provider.getShard().getGenomeLoc(); + this.referenceSequence = reference.getSubsequenceAt( bounds.getContig(), + bounds.getStart(), + bounds.getStop() ); + } + + /** + * Gets the reference base associated with this particular point on the genome. + * @param genomeLoc Region for which to retrieve the base. GenomeLoc must represent a 1-base region. + * @return The base at the position represented by this genomeLoc. + */ + public char getReferenceBase( GenomeLoc genomeLoc ) { + validateLocation( genomeLoc ); + int offset = (int)(genomeLoc.getStart() - bounds.getStart()); + return StringUtil.bytesToString( referenceSequence.getBases(), offset, 1 ).charAt(0); + } + + /** + * Validates that the genomeLoc is one base wide and is in the reference sequence. + * @param genomeLoc location to verify. + */ + private void validateLocation( GenomeLoc genomeLoc ) throws InvalidPositionException { + // + if( !genomeLoc.isSingleBP() ) + throw new InvalidPositionException( + String.format("Requested position larger than one base; start = %d, stop = %d", genomeLoc.getStart(), genomeLoc.getStop())); + if( !bounds.containsP(genomeLoc) ) + throw new InvalidPositionException( + String.format("Requested position %s not within interval %s", genomeLoc, bounds)); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/LocusView.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/LocusView.java new file mode 100755 index 000000000..b34f28714 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/LocusView.java @@ -0,0 +1,201 @@ +package org.broadinstitute.sting.gatk.dataSources.providers; + +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.dataSources.shards.Shard; +import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger; +import org.broadinstitute.sting.gatk.iterators.LocusContextIterator; +import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; +import org.broadinstitute.sting.utils.GenomeLoc; +import net.sf.samtools.SAMRecord; + +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.Collection; +import java.util.Collections; +import java.util.Arrays; + +import edu.mit.broad.picard.filter.FilteringIterator; +import edu.mit.broad.picard.filter.SamRecordFilter; +/** + * User: hanna + * Date: May 13, 2009 + * Time: 3:30:16 PM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * A queue of locus context entries. + */ + +public abstract class LocusView extends LocusContextIterator implements View { + /** + * The shard bounding this view. + */ + protected Shard shard; + + /** + * Source info for this view. Informs the class about downsampling requirements. + */ + private Reads sourceInfo; + + /** + * The actual locus context iterator. + */ + private LocusContextIterator loci; + + /** + * The next locus context from the iterator. This value must always be within + * the shard; if its null, there's nothing for the consumer to look at. + */ + private LocusContext nextLocusContext = null; + + public LocusView(ShardDataProvider provider) { + this.shard = provider.getShard(); + + Iterator reads = new FilteringIterator(provider.getReadIterator(), new LocusStreamFilterFunc()); + this.sourceInfo = provider.getReadIterator().getSourceInfo(); + + this.loci = new LocusContextIteratorByHanger(reads); + seedNextLocusContext(); + + provider.register(this); + } + + /** + * Only one view of the locus is supported at any given time. + * @return A list consisting of all other locus views. + */ + public Collection> getConflictingViews() { + return Arrays.>asList(LocusView.class,ReadView.class); + } + + /** + * Close this view. + */ + public void close() { + // Set everything to null with the hope of failing fast. + shard = null; + sourceInfo = null; + loci = null; + + super.close(); + } + + /** + * Is there another covered locus context bounded by this view. + * @return True if another covered locus context exists. False otherwise. + */ + public abstract boolean hasNext(); + + /** + * Returns the next covered locus context in the shard. + * @return Next covered locus context in the shard. + * @throw NoSuchElementException if no such element exists. + */ + public abstract LocusContext next(); + + /** + * Unsupported. + * @throw UnsupportedOperationException always. + */ + public void remove() { + throw new UnsupportedOperationException("Unable to remove elements from this queue."); + } + + /** + * Is there another locus context bounded by this shard. + * @return True if another locus context is bounded by this shard. + */ + protected boolean hasNextLocusContext() { + return nextLocusContext != null && !nextLocusContext.getLocation().isPast(shard.getGenomeLoc()); + } + + /** + * Get the next locus context bounded by this shard. + * @return Next locus context bounded by this shard. + * @throw NoSuchElementException if the next element is missing. + */ + protected LocusContext nextLocusContext() { + if( nextLocusContext == null || nextLocusContext.getLocation().isPast(shard.getGenomeLoc()) ) + throw new NoSuchElementException("No more elements remain in locus context queue."); + + // Cache the current and apply filtering. + LocusContext current = nextLocusContext; + + // Find the next. + if( loci.hasNext() ) { + nextLocusContext = loci.next(); + if( sourceInfo.getDownsampleToCoverage() != null ) + current.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() ); + if( nextLocusContext.getLocation().isPast(shard.getGenomeLoc()) ) + nextLocusContext = null; + } + else + nextLocusContext = null; + + return current; + } + + /** + * Seed the nextLocusContext variable with the contents of the next locus context (if one exists). + */ + private void seedNextLocusContext() { + if( loci.hasNext() ) + nextLocusContext = loci.next(); + + // Iterate past cruft at the beginning to the first locus in the shard. + while( nextLocusContext != null && nextLocusContext.getLocation().isBefore(shard.getGenomeLoc()) && loci.hasNext() ) + nextLocusContext = loci.next(); + + // If nothing in the shard was found, indicate that by setting nextLocusContext to null. + if( nextLocusContext != null && nextLocusContext.getLocation().isBefore(shard.getGenomeLoc()) ) + nextLocusContext = null; + } + + /** + * Class to filter out un-handle-able reads from the stream. We currently are skipping + * unmapped reads, non-primary reads, unaligned reads, and duplicate reads. + */ + private static class LocusStreamFilterFunc implements SamRecordFilter { + SAMRecord lastRead = null; + public boolean filterOut(SAMRecord rec) { + boolean result = false; + String why = ""; + if (rec.getReadUnmappedFlag()) { + TraversalStatistics.nUnmappedReads++; + result = true; + why = "Unmapped"; + } else if (rec.getNotPrimaryAlignmentFlag()) { + TraversalStatistics.nNotPrimary++; + result = true; + why = "Not Primary"; + } else if (rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) { + TraversalStatistics.nBadAlignments++; + result = true; + why = "No alignment start"; + } else if (rec.getDuplicateReadFlag()) { + TraversalStatistics.nDuplicates++; + result = true; + why = "Duplicate reads"; + } + else { + result = false; + } + + if (result) { + TraversalStatistics.nSkippedReads++; + //System.out.printf(" [filter] %s => %b %s", rec.getReadName(), result, why); + } else { + TraversalStatistics.nReads++; + } + return result; + } + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReadReferenceView.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReadReferenceView.java new file mode 100755 index 000000000..e7e27fdef --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReadReferenceView.java @@ -0,0 +1,56 @@ +package org.broadinstitute.sting.gatk.dataSources.providers; + +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMSequenceRecord; +import net.sf.samtools.util.StringUtil; +import edu.mit.broad.picard.reference.ReferenceSequence; +/** + * User: hanna + * Date: May 22, 2009 + * Time: 12:36:14 PM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * Provides access to the reference over a single read. + */ + +public class ReadReferenceView extends ReferenceView { + /** + * Create a view of the reference with respect to a single read. + * @param provider + */ + public ReadReferenceView( ShardDataProvider provider ) { + super( provider ); + } + + /** + * Gets the bases of the reference that are aligned to the given read. + * @param read the read for which to extract reference information. + * @return The bases corresponding to this read, or null if the read is unmapped. + * If the alignment goes off the end of the contig, return just the portion + * mapped to the reference. + */ + public char[] getReferenceBases( SAMRecord read ) { + if( read.getReadUnmappedFlag() ) + return null; + + String contig = read.getReferenceName(); + int start = read.getAlignmentStart(); + int stop = read.getAlignmentEnd(); + + SAMSequenceRecord sequenceRecord = reference.getSequenceDictionary().getSequence(contig); + if( stop > sequenceRecord.getSequenceLength() ) + stop = sequenceRecord.getSequenceLength(); + + ReferenceSequence alignmentToReference = reference.getSubsequenceAt( contig, start, stop ); + return StringUtil.bytesToString(alignmentToReference.getBases()).toCharArray(); + } + +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReadView.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReadView.java new file mode 100755 index 000000000..86e90e3a4 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReadView.java @@ -0,0 +1,64 @@ +package org.broadinstitute.sting.gatk.dataSources.providers; + +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; + +import java.util.Collection; +import java.util.Arrays; + +import net.sf.samtools.SAMRecord; +/** + * User: hanna + * Date: May 22, 2009 + * Time: 12:06:54 PM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * A view into the reads that a provider can provide. + */ +public class ReadView implements View, Iterable { + /** + * The iterator into the reads supplied by this provider. + */ + private StingSAMIterator reads; + + /** + * Create a new view of the reads given the current data set. + * @param provider Source for the data. + */ + public ReadView( ShardDataProvider provider ) { + reads = provider.getReadIterator(); + } + + /** + * Other reads and loci conflict with this view. + * @return Array of reads and loci. + */ + public Collection> getConflictingViews() { + return Arrays.>asList(ReadView.class, LocusView.class); + } + + /** + * Close the view over these reads. Note that this method closes just + * the view into the reads, not the reads themselves. + */ + public void close() { + // Don't close the reads. The provider is responsible for this. + // Just dispose of the pointer. + reads = null; + } + + /** + * Gets an iterator into the reads supplied by this provider. + * @return Iterator into the reads that this provider covers. + */ + public StingSAMIterator iterator() { + return reads; + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceOrderedView.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceOrderedView.java index bf8602366..4ffb96471 100755 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceOrderedView.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceOrderedView.java @@ -7,6 +7,8 @@ import org.broadinstitute.sting.utils.GenomeLoc; import java.util.List; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; /** * User: hanna * Date: May 21, 2009 @@ -43,9 +45,11 @@ public class ReferenceOrderedView implements View { for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() ) states.add( new ReferenceOrderedDataState( dataSource, (ReferenceOrderedData.RODIterator)dataSource.seek(provider.getShard()) ) ); - provider.register(this); + provider.register(this); } + public Collection> getConflictingViews() { return Collections.emptyList(); } + /** * Gets an object which can track the reference-ordered data at every locus. * @param loc Locus at which to track. diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceProvider.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceProvider.java deleted file mode 100755 index 35ead8fd1..000000000 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceProvider.java +++ /dev/null @@ -1,102 +0,0 @@ -package org.broadinstitute.sting.gatk.dataSources.providers; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -import org.broadinstitute.sting.gatk.dataSources.shards.Shard; -import edu.mit.broad.picard.reference.ReferenceSequence; -import net.sf.samtools.util.StringUtil; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMSequenceRecord; - -/** - * Created by IntelliJ IDEA. - * User: hanna - * Date: Apr 8, 2009 - * Time: 5:01:37 PM - * To change this template use File | Settings | File Templates. - */ -public class ReferenceProvider { - private IndexedFastaSequenceFile sequenceFile; - private Shard shard; - - /** - * Track the reference sequence and the last point accessed. Used to - * track state when traversing over the reference. - */ - private ReferenceSequence referenceSequence; - private GenomeLoc referenceInterval; - - /** - * Create a new reference provider supplying data from the given reference. - * @param sequenceFile Reference file to use. - * @param shard Shard over which to retrieve data. - */ - public ReferenceProvider( IndexedFastaSequenceFile sequenceFile, Shard shard ) { - this.sequenceFile = sequenceFile; - this.shard = shard; - } - - /** - * Gets the reference base at a single point. - * @param genomeLoc The location at which to fetch the reference base. - * @return The character representing the reference base. - * @throws InvalidPositionException in case the position is invalid. - */ - public char getReferenceBase( GenomeLoc genomeLoc ) throws InvalidPositionException { - if( referenceSequence == null ) - lazyInitializeLocusAccess(); - - validateLocation( genomeLoc ); - int offset = (int)(genomeLoc.getStart() - referenceInterval.getStart()); - return StringUtil.bytesToString( referenceSequence.getBases(), offset, 1 ).charAt(0); - } - - /** - * Gets the bases of the reference that are aligned to the given read. - * @param read the read for which to extract reference information. - * @return The bases corresponding to this read, or null if the read is unmapped. - * If the alignment goes off the end of the contig, return just the portion - * mapped to the reference. - */ - public char[] getReferenceBases( SAMRecord read ) { - if( read.getReadUnmappedFlag() ) - return null; - - String contig = read.getReferenceName(); - int start = read.getAlignmentStart(); - int stop = read.getAlignmentEnd(); - - SAMSequenceRecord sequenceRecord = sequenceFile.getSequenceDictionary().getSequence(contig); - if( stop > sequenceRecord.getSequenceLength() ) - stop = sequenceRecord.getSequenceLength(); - - ReferenceSequence alignmentToReference = sequenceFile.getSubsequenceAt( contig, start, stop ); - return StringUtil.bytesToString(alignmentToReference.getBases()).toCharArray(); - } - - /** - * Perform a lazy initialization of access to the locus. Sets up the reference sequence and - * limits the user to work only at that sequence. - */ - private void lazyInitializeLocusAccess() { - GenomeLoc position = shard.getGenomeLoc(); - this.referenceSequence = sequenceFile.getSubsequenceAt( position.getContig(), - position.getStart(), - position.getStop() ); - this.referenceInterval = position; - } - - /** - * Validates that the genomeLoc is one base wide and is in the reference sequence. - * @param genomeLoc location to verify. - */ - private void validateLocation( GenomeLoc genomeLoc ) throws InvalidPositionException { - // - if( !referenceInterval.containsP(genomeLoc) ) - throw new InvalidPositionException( - String.format("Requested position %s not within interval %s", genomeLoc, referenceInterval)); - if( genomeLoc.getStart() != genomeLoc.getStop() ) - throw new InvalidPositionException( - String.format("Requested position larger than one base; start = %d, stop = %d", genomeLoc.getStart(), genomeLoc.getStop())); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceView.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceView.java new file mode 100755 index 000000000..813d3c76a --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceView.java @@ -0,0 +1,51 @@ +package org.broadinstitute.sting.gatk.dataSources.providers; + +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; + +import java.util.Collections; +import java.util.Collection; +/** + * User: hanna + * Date: May 22, 2009 + * Time: 12:19:17 PM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * A view into the reference backing this shard. + */ +public class ReferenceView implements View { + /** + * The source of reference data. + */ + protected IndexedFastaSequenceFile reference = null; + + /** + * Create a new ReferenceView. + * @param provider + */ + public ReferenceView( ShardDataProvider provider ) { + this.reference = provider.getReference(); + } + + /** + * Reference views don't conflict with anything else. + * @return Empty list. + */ + public Collection> getConflictingViews() { return Collections.emptyList(); } + + /** + * Deinitialize pointers for fast fail. Someone else will handle file management. + */ + public void close() { + reference = null; + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/SeekableLocusContextQueue.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/SeekableLocusContextQueue.java deleted file mode 100755 index ad1628104..000000000 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/SeekableLocusContextQueue.java +++ /dev/null @@ -1,124 +0,0 @@ -package org.broadinstitute.sting.gatk.dataSources.providers; - -import org.broadinstitute.sting.gatk.iterators.LocusContextIterator; -import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger; -import org.broadinstitute.sting.gatk.LocusContext; -import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.gatk.traversals.TraversalEngine; -import org.broadinstitute.sting.gatk.dataSources.shards.Shard; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.apache.log4j.Logger; -import net.sf.samtools.SAMRecord; - -import java.util.ArrayList; -import java.util.Iterator; - -import edu.mit.broad.picard.filter.FilteringIterator; -/** - * User: hanna - * Date: May 12, 2009 - * Time: 11:24:42 AM - * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT - * Software and documentation are copyright 2005 by the Broad Institute. - * All rights are reserved. - * - * Users acknowledge that this software is supplied without any warranty or support. - * The Broad Institute is not responsible for its use, misuse, or - * functionality. - */ - -/** - * A queue of locus contexts. Provides unidirectional seek. Stripped down - * implementation of java.util.Queue interface. - */ - -public class SeekableLocusContextQueue extends LocusContextQueue { - /** - * Gets the position to which the last seek was requested. - */ - private GenomeLoc seekPoint; - - /** - * What's the context for the last locus accessed? - * @param provider - */ - private LocusContext nextLocusContext = null; - - private static Logger logger = Logger.getLogger(SeekableLocusContextQueue.class); - - /** - * Create a new queue of locus contexts. - * @param provider - */ - public SeekableLocusContextQueue(ShardDataProvider provider) { - super(provider); - - // Seed the state tracking members with the first possible seek position and the first possible locus context. - seekPoint = new GenomeLoc(shard.getGenomeLoc().getContigIndex(),shard.getGenomeLoc().getStart()); - - if( hasNextLocusContext() ) - nextLocusContext = getNextLocusContext(); - else - nextLocusContext = this.createEmptyLocusContext(seekPoint); - } - - /** - * Get the locus context at the given position. - * @return Locus context, or null if no locus context exists at this position. - */ - public LocusContext peek() { - // Haven't reached the next locus context in the list yet. Return null. - if( seekPoint.isBefore(nextLocusContext.getLocation()) ) - return createEmptyLocusContext(seekPoint); - - return nextLocusContext; - } - - /** - * Seek to the given point the queue of locus contexts. - * @param target Target base pair to which to seek. Must be a single base pair. - * @return an instance of itself for parameter chaining. - */ - public LocusContextQueue seek(GenomeLoc target) { - if( !target.isSingleBP() ) - throw new IllegalArgumentException("Seek point must be a single base pair."); - - // If outside the range of the target, throw an illegal argument exception. - if( target.isBefore(shard.getGenomeLoc()) || target.isPast(shard.getGenomeLoc())) - throw new IllegalArgumentException(String.format("Target is out of range; target = %s, valid range = %s",target,shard.getGenomeLoc())); - - seekPoint = (GenomeLoc)target.clone(); - - // Search for the next locus context following the target positions. - while (nextLocusContext.getLocation().isBefore(target) && hasNextLocusContext() ) { - logger.debug(String.format(" current locus is %s vs %s => %d", nextLocusContext.getLocation(), - target, - nextLocusContext.getLocation().compareTo(target))); - nextLocusContext = getNextLocusContext(); - } - - // Couldn't find a next? Force the nextLocusContext to null. - if( nextLocusContext.getLocation().isBefore(target) && !hasNextLocusContext() ) - nextLocusContext = createEmptyLocusContext( seekPoint ); - - return this; - } - - /** - * Gets the point to which the queue has currently seeked. - * @return Single bp position where the queue has been positioned. A locus context may or may not - * exist at this point. - */ - public GenomeLoc getSeekPoint() { - return seekPoint; - } - - /** - * Creates a blank locus context at the specified location. - * @param site Site at which to create the blank locus context. - * @return empty context. - */ - private LocusContext createEmptyLocusContext( GenomeLoc site ) { - return new LocusContext(site, new ArrayList(), new ArrayList()); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ShardDataProvider.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ShardDataProvider.java index 1ee0c10dc..f24acc281 100755 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ShardDataProvider.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ShardDataProvider.java @@ -3,16 +3,16 @@ package org.broadinstitute.sting.gatk.dataSources.providers; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.NullSAMIterator; import org.broadinstitute.sting.gatk.dataSources.shards.Shard; -import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.GenomeLoc; -import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.StingException; -import java.io.File; import java.util.ArrayList; import java.util.List; +import java.util.Collection; +import java.io.File; /** * User: hanna * Date: May 8, 2009 @@ -49,7 +49,7 @@ public class ShardDataProvider { /** * Provider of reference data for this particular shard. */ - private final ReferenceProvider referenceProvider; + private final IndexedFastaSequenceFile reference; /** * Sources of reference-ordered data. @@ -77,18 +77,25 @@ public class ShardDataProvider { * @return True if possible, false otherwise. */ public boolean hasReference() { - return referenceProvider != null; + return reference != null; } /** * Gets an iterator over all the reads bound by this shard. - * WARNING: Right now, this cannot be concurrently accessed with getLocusContext(). * @return An iterator over all reads in this shard. */ - public StingSAMIterator getReadIterator() { + StingSAMIterator getReadIterator() { return reads; } + /** + * Gets a pointer into the given indexed fasta sequence file. + * @return The indexed fasta sequence file. + */ + IndexedFastaSequenceFile getReference() { + return reference; + } + /** * Gets a window into the reference-ordered data. Package protected so that only * views can access it. @@ -98,35 +105,17 @@ public class ShardDataProvider { return referenceOrderedData; } - /** - * Gets the reference base associated with this particular point on the genome. - * @param genomeLoc Region for which to retrieve the base. GenomeLoc must represent a 1-base region. - * @return The base at the position represented by this genomeLoc. - */ - public char getReferenceBase( GenomeLoc genomeLoc ) { - return referenceProvider.getReferenceBase(genomeLoc); - } - - /** - * Gets the reference sequence, as a char[], for the provided read. - * @param read the read to fetch the reference sequence for - * @return a char string of bases representing the reference sequence mapped to passed in read - */ - public char[] getReferenceForRead( SAMRecord read ) { - return referenceProvider.getReferenceBases(read); - } - /** * Create a data provider for the shard given the reads and reference. * @param shard The chunk of data over which traversals happen. - * @param reads A window into the reads for a given region. + * @param reads A window into the reads for a given region. * @param reference A getter for a section of the reference. */ public ShardDataProvider( Shard shard, SAMDataSource reads, IndexedFastaSequenceFile reference, List rods) { this.shard = shard; // Provide basic reads information. this.reads = (reads != null) ? reads.seek( shard ) : new NullSAMIterator(new Reads(new ArrayList())); - this.referenceProvider = (reference != null) ? new ReferenceProvider(reference,shard) : null; + this.reference = reference; this.referenceOrderedData = rods; } @@ -138,11 +127,36 @@ public class ShardDataProvider { ShardDataProvider( Shard shard, StingSAMIterator reads ) { this.shard = shard; this.reads = reads; - this.referenceProvider = null; + this.reference = null; this.referenceOrderedData = null; } + /** + * Register this view with the shard provider, and make sure it has no conflicts with any other views. + * @param view The new view. + */ void register( View view ) { + // Check all registered classes to see whether a conflict exists. + for( View registeredView: registeredViews ) { + Collection> conflicts = registeredView.getConflictingViews(); + for( Class conflict: conflicts ) { + if( conflict.isInstance(view) ) + throw new StingException(String.format("Tried to registered two conflicting views: %s and %s", + registeredView.getClass().getSimpleName(), + view.getClass().getSimpleName())); + } + } + + // Check whether this class has any objection to any other classes. + for( Class conflict: view.getConflictingViews() ) { + for( View registeredView: registeredViews ) { + if( conflict.isInstance(registeredView) ) + throw new StingException(String.format("Tried to registered two conflicting views: %s and %s", + registeredView.getClass().getSimpleName(), + view.getClass().getSimpleName())); + } + } + this.registeredViews.add(view); } diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/View.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/View.java index 1db38e456..10f3e21d4 100755 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/View.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/View.java @@ -1,4 +1,7 @@ package org.broadinstitute.sting.gatk.dataSources.providers; + +import java.util.List; +import java.util.Collection; /** * User: hanna * Date: May 21, 2009 @@ -16,6 +19,11 @@ package org.broadinstitute.sting.gatk.dataSources.providers; * Represents a view into given data. */ public interface View { + /** + * Gets a list of all types of views which can conflict with this view. + */ + public Collection> getConflictingViews(); + /** * Inform this view that the data provided to it no longer exists. */ diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceLocusIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java similarity index 77% rename from java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceLocusIterator.java rename to java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java index df2c8581d..bead63818 100755 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceLocusIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java @@ -1,6 +1,5 @@ -package org.broadinstitute.sting.gatk.dataSources.providers; +package org.broadinstitute.sting.gatk.iterators; -import org.broadinstitute.sting.gatk.iterators.LocusIterator; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.GenomeLoc; @@ -21,7 +20,7 @@ import java.util.NoSuchElementException; /** * Iterates through all of the loci provided in the reference. */ -public class ReferenceLocusIterator implements LocusIterator { +public class GenomeLocusIterator implements LocusIterator { /** * The entire region over which we're iterating. */ @@ -38,11 +37,9 @@ public class ReferenceLocusIterator implements LocusIterator { * @param provider Data provider to use as a backing source. * Provider must have a reference (hasReference() == true). */ - public ReferenceLocusIterator( ShardDataProvider provider ) { - if( !provider.hasReference() ) - throw new StingException("Trying to iterate through reference, but no reference has been provided."); - completeLocus = provider.getShard().getGenomeLoc(); - currentLocus = new GenomeLoc(completeLocus.getContig(),completeLocus.getStart()); + public GenomeLocusIterator( GenomeLoc completeLocus ) { + this.completeLocus = completeLocus; + this.currentLocus = new GenomeLoc(completeLocus.getContig(),completeLocus.getStart()); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java index f00a681e4..109c9269a 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java @@ -4,6 +4,7 @@ import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.dataSources.providers.ReadView; import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard; import org.broadinstitute.sting.gatk.dataSources.shards.Shard; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; @@ -311,7 +312,7 @@ public class TraverseDuplicates extends TraversalEngine { // -> those with the same mate pair position, for paired reads // -> those flagged as unpaired and duplicated but having the same start and end and - FilteringIterator filterIter = new FilteringIterator(dataProvider.getReadIterator(), new duplicateStreamFilterFunc()); + FilteringIterator filterIter = new FilteringIterator(new ReadView(dataProvider).iterator(), new duplicateStreamFilterFunc()); PushbackIterator iter = new PushbackIterator(filterIter); return actuallyTraverse(dupWalker, iter, sum); } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java index 0e0b91474..13e953d04 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java @@ -6,16 +6,16 @@ import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.dataSources.shards.Shard; -import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceLocusIterator; import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider; -import org.broadinstitute.sting.gatk.dataSources.providers.SeekableLocusContextQueue; -import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextQueue; -import org.broadinstitute.sting.gatk.dataSources.providers.IterableLocusContextQueue; +import org.broadinstitute.sting.gatk.dataSources.providers.AllLocusView; +import org.broadinstitute.sting.gatk.dataSources.providers.CoveredLocusView; +import org.broadinstitute.sting.gatk.dataSources.providers.LocusView; import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceOrderedView; +import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceView; +import org.broadinstitute.sting.gatk.dataSources.providers.LocusReferenceView; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.iterators.LocusIterator; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; import org.apache.log4j.Logger; @@ -25,9 +25,7 @@ import java.util.ArrayList; import java.io.File; /** - * A simple, short-term solution to iterating over all reference positions over a series of - * genomic locations. Simply overloads the superclass traverse function to go over the entire - * interval's reference positions. + * A simple solution to iterating over all reference positions over a series of genomic locations. */ public class TraverseLoci extends TraversalEngine { @@ -59,36 +57,20 @@ public class TraverseLoci extends TraversalEngine { LocusWalker locusWalker = (LocusWalker)walker; - LocusIterator locusIterator = null; - LocusContextQueue locusContextQueue = null; + LocusView locusView = getLocusView( walker, dataProvider ); + LocusReferenceView referenceView = new LocusReferenceView( dataProvider ); ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider ); - DataSource dataSource = WalkerManager.getWalkerDataSource(walker); - switch( dataSource ) { - case REFERENCE: - locusIterator = new ReferenceLocusIterator( dataProvider ); - locusContextQueue = new SeekableLocusContextQueue( dataProvider ); - break; - case READS: - IterableLocusContextQueue iterableQueue = new IterableLocusContextQueue( dataProvider ); - locusIterator = iterableQueue; - locusContextQueue = iterableQueue; - break; - default: - throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource); - } - // We keep processing while the next reference location is within the interval - while( locusIterator.hasNext() ) { - GenomeLoc site = locusIterator.next(); + while( locusView.hasNext() ) { + LocusContext locus = locusView.next(); TraversalStatistics.nRecords++; // Iterate forward to get all reference ordered data covering this locus - final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(site); + final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation()); - LocusContext locus = locusContextQueue.seek( site ).peek(); - char refBase = dataProvider.getReferenceBase( site ); + char refBase = referenceView.getReferenceBase(locus.getLocation()); final boolean keepMeP = locusWalker.filter(tracker, refBase, locus); if (keepMeP) { @@ -116,4 +98,19 @@ public class TraverseLoci extends TraversalEngine { public void printOnTraversalDone( T sum ) { printOnTraversalDone( "loci", sum ); } + + /** + * Gets the best view of loci for this walker given the available data. + * @param walker walker to interrogate. + * @param dataProvider Data which which to drive the locus view. + */ + private LocusView getLocusView( Walker walker, ShardDataProvider dataProvider ) { + DataSource dataSource = WalkerManager.getWalkerDataSource(walker); + if( dataSource == DataSource.READS ) + return new CoveredLocusView(dataProvider); + else if( dataSource == DataSource.REFERENCE ) + return new AllLocusView(dataProvider); + else + throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource); + } } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java index 4ffc4685b..7f7c842bb 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java @@ -4,6 +4,8 @@ import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.dataSources.providers.ReadView; +import org.broadinstitute.sting.gatk.dataSources.providers.ReadReferenceView; import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard; import org.broadinstitute.sting.gatk.dataSources.shards.Shard; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; @@ -87,8 +89,11 @@ public class TraverseReads extends TraversalEngine { ReadWalker readWalker = (ReadWalker) walker; + ReadView reads = new ReadView(dataProvider); + ReadReferenceView reference = new ReadReferenceView(dataProvider); + // while we still have more reads - for (SAMRecord read : dataProvider.getReadIterator()) { + for (SAMRecord read : reads) { // our locus context LocusContext locus = null; @@ -105,7 +110,7 @@ public class TraverseReads extends TraversalEngine { // get the array of characters for the reference sequence, since we're a mapped read if( dataProvider.hasReference() ) - refSeq = dataProvider.getReferenceForRead( read ); + refSeq = reference.getReferenceBases( read ); } // update the number of reads we've seen diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/providers/AllLocusViewTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/providers/AllLocusViewTest.java new file mode 100755 index 000000000..a86a7e327 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/dataSources/providers/AllLocusViewTest.java @@ -0,0 +1,59 @@ +package org.broadinstitute.sting.gatk.dataSources.providers; + +import org.junit.Assert; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.gatk.LocusContext; +import net.sf.samtools.SAMRecord; + +import java.util.List; +/** + * User: hanna + * Date: May 12, 2009 + * Time: 2:34:46 PM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * Test the view of all loci. + */ +public class AllLocusViewTest extends LocusViewTemplate { + + @Override + protected LocusView createView(ShardDataProvider provider) { + return new AllLocusView(provider); + } + + /** + * Test the reads according to an independently derived context. + * @param view + * @param bounds + * @param reads + */ + @Override + protected void testReadsInContext( LocusView view, GenomeLoc bounds, List reads ) { + AllLocusView allLocusView = (AllLocusView)view; + + for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) { + GenomeLoc site = new GenomeLoc("chr1",i); + LocusContext locusContext = allLocusView.next(); + Assert.assertEquals("Locus context location is incorrect", site, locusContext.getLocation() ); + int expectedReadsAtSite = 0; + + for( SAMRecord read: reads ) { + if(new GenomeLoc(read).containsP(locusContext.getLocation())) { + Assert.assertTrue("Target locus context does not contain reads", locusContext.getReads().contains(read) ); + expectedReadsAtSite++; + } + } + + Assert.assertEquals("Found wrong number of reads at site", expectedReadsAtSite, locusContext.getReads().size()); + } + + } +} diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/providers/IterableLocusContextQueueTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/providers/CoveredLocusViewTest.java similarity index 52% rename from java/test/org/broadinstitute/sting/gatk/dataSources/providers/IterableLocusContextQueueTest.java rename to java/test/org/broadinstitute/sting/gatk/dataSources/providers/CoveredLocusViewTest.java index c4aa60a3e..043c96cff 100755 --- a/java/test/org/broadinstitute/sting/gatk/dataSources/providers/IterableLocusContextQueueTest.java +++ b/java/test/org/broadinstitute/sting/gatk/dataSources/providers/CoveredLocusViewTest.java @@ -1,31 +1,11 @@ package org.broadinstitute.sting.gatk.dataSources.providers; -import org.junit.Test; import org.junit.Assert; -import org.junit.BeforeClass; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.dataSources.shards.Shard; -import org.broadinstitute.sting.gatk.dataSources.shards.LocusShard; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.LocusContext; -import org.broadinstitute.sting.BaseTest; import net.sf.samtools.SAMRecord; -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; -import net.sf.samtools.SAMSequenceDictionary; -import net.sf.samtools.SAMSequenceRecord; -import net.sf.samtools.SAMFileHeader; import java.util.List; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.Collections; -import java.io.FileNotFoundException; - -import edu.mit.broad.picard.reference.ReferenceSequenceFile; -import edu.mit.broad.picard.reference.ReferenceSequence; /** * User: hanna * Date: May 12, 2009 @@ -40,24 +20,27 @@ import edu.mit.broad.picard.reference.ReferenceSequence; */ /** - * Test the locus context queue. + * Test the CoveredLocusView. */ -public class IterableLocusContextQueueTest extends LocusContextQueueTemplate { +public class CoveredLocusViewTest extends LocusViewTemplate { + /** + * Retrieve a covered locus view. + */ @Override - protected LocusContextQueue createQueue(ShardDataProvider provider) { - return new IterableLocusContextQueue(provider); + protected LocusView createView(ShardDataProvider provider) { + return new CoveredLocusView(provider); } /** * Test the reads according to an independently derived context. - * @param queue + * @param view * @param bounds * @param reads */ @Override - protected void testReadsInContext( LocusContextQueue queue, GenomeLoc bounds, List reads ) { - IterableLocusContextQueue iterableQueue = (IterableLocusContextQueue)queue; + protected void testReadsInContext( LocusView view, GenomeLoc bounds, List reads ) { + CoveredLocusView coveredLocusView = (CoveredLocusView)view; for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) { GenomeLoc site = new GenomeLoc("chr1",i); @@ -71,12 +54,9 @@ public class IterableLocusContextQueueTest extends LocusContextQueueTemplate { if( expectedReadsAtSite < 1 ) continue; - Assert.assertTrue("Incorrect number of loci in queue",iterableQueue.hasNext()); + Assert.assertTrue("Incorrect number of loci in view",coveredLocusView.hasNext()); - GenomeLoc nextLocus = iterableQueue.next(); - Assert.assertEquals("Next locus context returned is incorrect", site, nextLocus ); - - LocusContext locusContext = iterableQueue.seek(site).peek(); + LocusContext locusContext = coveredLocusView.next(); Assert.assertEquals("Target locus context location is incorrect", site, locusContext.getLocation() ); Assert.assertEquals("Found wrong number of reads at site", expectedReadsAtSite, locusContext.getReads().size()); @@ -86,6 +66,6 @@ public class IterableLocusContextQueueTest extends LocusContextQueueTemplate { } } - Assert.assertFalse("Iterator is not bounded at boundaries of shard", iterableQueue.hasNext()); - } + Assert.assertFalse("Iterator is not bounded at boundaries of shard", coveredLocusView.hasNext()); + } } diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/providers/LocusContextQueueTemplate.java b/java/test/org/broadinstitute/sting/gatk/dataSources/providers/LocusViewTemplate.java similarity index 82% rename from java/test/org/broadinstitute/sting/gatk/dataSources/providers/LocusContextQueueTemplate.java rename to java/test/org/broadinstitute/sting/gatk/dataSources/providers/LocusViewTemplate.java index f26daede2..b7206aed3 100755 --- a/java/test/org/broadinstitute/sting/gatk/dataSources/providers/LocusContextQueueTemplate.java +++ b/java/test/org/broadinstitute/sting/gatk/dataSources/providers/LocusViewTemplate.java @@ -40,10 +40,10 @@ import net.sf.samtools.CigarOperator; */ /** - * Base support for testing variants of the LocusContextQueue family of classes. + * Base support for testing variants of the LocusView family of classes. */ -public abstract class LocusContextQueueTemplate extends BaseTest { +public abstract class LocusViewTemplate extends BaseTest { protected static ReferenceSequenceFile sequenceSourceFile = null; @BeforeClass @@ -60,9 +60,9 @@ public abstract class LocusContextQueueTemplate extends BaseTest { Shard shard = new LocusShard(shardBounds); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); - LocusContextQueue queue = createQueue( dataProvider ); + LocusView view = createView( dataProvider ); - testReadsInContext( queue, shard.getGenomeLoc(), Collections.emptyList() ); + testReadsInContext( view, shard.getGenomeLoc(), Collections.emptyList() ); } @Test @@ -74,9 +74,9 @@ public abstract class LocusContextQueueTemplate extends BaseTest { Shard shard = new LocusShard(shardBounds); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); - LocusContextQueue queue = createQueue( dataProvider ); + LocusView view = createView( dataProvider ); - testReadsInContext( queue, shard.getGenomeLoc(), Collections.singletonList(read) ); + testReadsInContext( view, shard.getGenomeLoc(), Collections.singletonList(read) ); } @Test @@ -86,9 +86,9 @@ public abstract class LocusContextQueueTemplate extends BaseTest { Shard shard = new LocusShard(new GenomeLoc("chr1",1,10)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); - LocusContextQueue queue = createQueue( dataProvider ); + LocusView view = createView( dataProvider ); - testReadsInContext( queue, shard.getGenomeLoc(), Collections.singletonList(read) ); + testReadsInContext( view, shard.getGenomeLoc(), Collections.singletonList(read) ); } @Test @@ -98,9 +98,9 @@ public abstract class LocusContextQueueTemplate extends BaseTest { Shard shard = new LocusShard(new GenomeLoc("chr1",1,10)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); - LocusContextQueue queue = createQueue( dataProvider ); + LocusView view = createView( dataProvider ); - testReadsInContext( queue, shard.getGenomeLoc(), Collections.singletonList(read) ); + testReadsInContext( view, shard.getGenomeLoc(), Collections.singletonList(read) ); } @Test @@ -110,9 +110,9 @@ public abstract class LocusContextQueueTemplate extends BaseTest { Shard shard = new LocusShard(new GenomeLoc("chr1",1,10)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); - LocusContextQueue queue = createQueue( dataProvider ); + LocusView view = createView( dataProvider ); - testReadsInContext( queue, shard.getGenomeLoc(), Collections.singletonList(read) ); + testReadsInContext( view, shard.getGenomeLoc(), Collections.singletonList(read) ); } @Test @@ -122,9 +122,9 @@ public abstract class LocusContextQueueTemplate extends BaseTest { Shard shard = new LocusShard(new GenomeLoc("chr1",6,15)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); - LocusContextQueue queue = createQueue( dataProvider ); + LocusView view = createView( dataProvider ); - testReadsInContext( queue, shard.getGenomeLoc(), Collections.singletonList(read) ); + testReadsInContext( view, shard.getGenomeLoc(), Collections.singletonList(read) ); } @Test @@ -134,9 +134,9 @@ public abstract class LocusContextQueueTemplate extends BaseTest { Shard shard = new LocusShard(new GenomeLoc("chr1",1,10)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); - LocusContextQueue queue = createQueue( dataProvider ); + LocusView view = createView( dataProvider ); - testReadsInContext( queue, shard.getGenomeLoc(), Collections.singletonList(read) ); + testReadsInContext( view, shard.getGenomeLoc(), Collections.singletonList(read) ); } @Test @@ -147,11 +147,11 @@ public abstract class LocusContextQueueTemplate extends BaseTest { Shard shard = new LocusShard(new GenomeLoc("chr1",1,10)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); - LocusContextQueue queue = createQueue( dataProvider ); + LocusView view = createView( dataProvider ); List expectedReads = new ArrayList(); Collections.addAll(expectedReads,read1,read2); - testReadsInContext( queue, shard.getGenomeLoc(), expectedReads ); + testReadsInContext( view, shard.getGenomeLoc(), expectedReads ); } @Test @@ -164,11 +164,11 @@ public abstract class LocusContextQueueTemplate extends BaseTest { Shard shard = new LocusShard(new GenomeLoc("chr1",1,10)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); - LocusContextQueue queue = createQueue( dataProvider ); + LocusView view = createView( dataProvider ); List expectedReads = new ArrayList(); Collections.addAll(expectedReads,read1,read2,read3,read4); - testReadsInContext( queue, shard.getGenomeLoc(), expectedReads ); + testReadsInContext( view, shard.getGenomeLoc(), expectedReads ); } @Test @@ -181,11 +181,11 @@ public abstract class LocusContextQueueTemplate extends BaseTest { Shard shard = new LocusShard(new GenomeLoc("chr1",1,10)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); - LocusContextQueue queue = createQueue( dataProvider ); + LocusView view = createView( dataProvider ); List expectedReads = new ArrayList(); Collections.addAll(expectedReads,read1,read2,read3,read4); - testReadsInContext( queue, shard.getGenomeLoc(), expectedReads ); + testReadsInContext( view, shard.getGenomeLoc(), expectedReads ); } @Test @@ -200,11 +200,11 @@ public abstract class LocusContextQueueTemplate extends BaseTest { Shard shard = new LocusShard(new GenomeLoc("chr1",1,10)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); - LocusContextQueue queue = createQueue( dataProvider ); + LocusView view = createView( dataProvider ); List expectedReads = new ArrayList(); Collections.addAll(expectedReads,read1,read2,read3,read4,read5,read6); - testReadsInContext( queue, shard.getGenomeLoc(), expectedReads ); + testReadsInContext( view, shard.getGenomeLoc(), expectedReads ); } @Test @@ -226,27 +226,27 @@ public abstract class LocusContextQueueTemplate extends BaseTest { Shard shard = new LocusShard(new GenomeLoc("chr1",6,15)); ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator ); - LocusContextQueue queue = createQueue( dataProvider ); + LocusView view = createView( dataProvider ); List expectedReads = new ArrayList(); Collections.addAll(expectedReads,read01,read02,read03,read04,read05,read06, read07,read08,read09,read10,read11,read12); - testReadsInContext( queue, shard.getGenomeLoc(), expectedReads ); + testReadsInContext( view, shard.getGenomeLoc(), expectedReads ); } /** - * Creates a queue of the type required for testing. - * @return The correct queue to test. + * Creates a view of the type required for testing. + * @return The correct view to test. */ - protected abstract LocusContextQueue createQueue( ShardDataProvider provider ); + protected abstract LocusView createView( ShardDataProvider provider ); /** * Test the reads according to an independently derived context. - * @param queue + * @param view * @param bounds * @param reads */ - protected abstract void testReadsInContext( LocusContextQueue queue, GenomeLoc bounds, List reads ); + protected abstract void testReadsInContext( LocusView view, GenomeLoc bounds, List reads ); /** * Fake a reference sequence file. Essentially, seek a header with a bunch of dummy data. diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/providers/SeekableLocusContextQueueTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/providers/SeekableLocusContextQueueTest.java deleted file mode 100755 index 80cb080ff..000000000 --- a/java/test/org/broadinstitute/sting/gatk/dataSources/providers/SeekableLocusContextQueueTest.java +++ /dev/null @@ -1,90 +0,0 @@ -package org.broadinstitute.sting.gatk.dataSources.providers; - -import org.junit.Test; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.dataSources.shards.Shard; -import org.broadinstitute.sting.gatk.dataSources.shards.LocusShard; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.LocusContext; -import org.broadinstitute.sting.BaseTest; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; -import net.sf.samtools.SAMSequenceDictionary; -import net.sf.samtools.SAMSequenceRecord; -import net.sf.samtools.SAMFileHeader; - -import java.util.List; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.Collections; -import java.io.FileNotFoundException; - -import edu.mit.broad.picard.reference.ReferenceSequenceFile; -import edu.mit.broad.picard.reference.ReferenceSequence; -/** - * User: hanna - * Date: May 12, 2009 - * Time: 2:34:46 PM - * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT - * Software and documentation are copyright 2005 by the Broad Institute. - * All rights are reserved. - * - * Users acknowledge that this software is supplied without any warranty or support. - * The Broad Institute is not responsible for its use, misuse, or - * functionality. - */ - -/** - * Test the locus context queue. - */ -public class SeekableLocusContextQueueTest extends LocusContextQueueTemplate { - - - - /** - * Retrieve a seekable locus context queue. - */ - @Override - protected LocusContextQueue createQueue(ShardDataProvider provider) { - return new SeekableLocusContextQueue(provider); - } - - /** - * Test the reads according to an independently derived context. - * @param queue - * @param bounds - * @param reads - */ - @Override - protected void testReadsInContext( LocusContextQueue queue, GenomeLoc bounds, List reads ) { - SeekableLocusContextQueue seekableQueue = (SeekableLocusContextQueue)queue; - - Assert.assertEquals("Initial position of queue is incorrect", new GenomeLoc(bounds.getContig(),bounds.getStart()), seekableQueue.getSeekPoint() ); - - for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) { - GenomeLoc site = new GenomeLoc("chr1",i); - seekableQueue.seek(site); - Assert.assertEquals("Seeked queue is incorrect", site, seekableQueue.getSeekPoint() ); - - LocusContext locusContext = seekableQueue.peek(); - Assert.assertEquals("Target locus context location is incorrect", site, locusContext.getLocation() ); - int expectedReadsAtSite = 0; - - for( SAMRecord read: reads ) { - if(new GenomeLoc(read).containsP(locusContext.getLocation())) { - Assert.assertTrue("Target locus context does not contain reads", locusContext.getReads().contains(read) ); - expectedReadsAtSite++; - } - } - - Assert.assertEquals("Found wrong number of reads at site", expectedReadsAtSite, locusContext.getReads().size()); - } - - } - -}