Break locus context data access providers into modular components in preparation for traverse by loci.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@689 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7084ecdeb6
commit
12ae3a22b6
|
|
@ -1,74 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||
|
||||
import edu.mit.broad.picard.filter.FilteringIterator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusContextIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: hanna
|
||||
* Date: Apr 8, 2009
|
||||
* Time: 3:00:28 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class LocusContextProvider {
|
||||
private Iterator<SAMRecord> reads;
|
||||
|
||||
// What's the last locus accessed? Used for sanity checking.
|
||||
private GenomeLoc lastLoc = null;
|
||||
private LocusContextIterator loci;
|
||||
private LocusContext locus;
|
||||
protected static Logger logger = Logger.getLogger(LocusContextProvider.class);
|
||||
|
||||
public LocusContextProvider( Iterator<SAMRecord> reads ) {
|
||||
this.reads = new FilteringIterator(reads, new TraversalEngine.locusStreamFilterFunc());
|
||||
// prepare the iterator by loci from reads
|
||||
loci = new LocusContextIteratorByHanger(this.reads);
|
||||
}
|
||||
|
||||
public LocusContext getLocusContext( GenomeLoc loc ) {
|
||||
// Precondition checks
|
||||
if( lastLoc != null && !loc.isPast( lastLoc ) )
|
||||
throw new RuntimeException( "Internal error: LocusContextProvider assumes that queries it receives are ordered." );
|
||||
|
||||
if( (loc.getStop() - loc.getStart()) > 0 )
|
||||
throw new RuntimeException( "Internal error :LocusContextProviders currently require 1-base genomeLocs.");
|
||||
|
||||
// jump to the first reference site
|
||||
LocusContext locusContext = advanceReadsToLoc( loci, loc );
|
||||
|
||||
// if no locus context was found, create an empty locus
|
||||
if ( locusContext == null || locusContext.getLocation().compareTo( loc ) != 0 )
|
||||
locusContext = new LocusContext(loc, new ArrayList<SAMRecord>(), new ArrayList<Integer>());
|
||||
|
||||
lastLoc = loc;
|
||||
|
||||
return locusContext;
|
||||
}
|
||||
|
||||
private LocusContext advanceReadsToLoc(LocusContextIterator locusIter, GenomeLoc target) {
|
||||
if ( ! locusIter.hasNext() )
|
||||
return null;
|
||||
|
||||
if (locus == null) {
|
||||
locus = locusIter.next();
|
||||
}
|
||||
|
||||
while (target.isPast(locus.getLocation()) && locusIter.hasNext() ) {
|
||||
logger.debug(String.format(" current locus is %s vs %s => %d", locus.getLocation(), target, locus.getLocation().compareTo(target)));
|
||||
locus = locusIter.next();
|
||||
}
|
||||
|
||||
logger.debug(String.format(" returning %s", locus.getLocation()));
|
||||
return locus;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,128 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusContextIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.apache.log4j.Logger;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
|
||||
import edu.mit.broad.picard.filter.FilteringIterator;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 12, 2009
|
||||
* Time: 11:24:42 AM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A queue of locus contexts. Provides unidirectional seek. Stripped down
|
||||
* implementation of java.util.Queue interface.
|
||||
*/
|
||||
|
||||
public class LocusContextQueue {
|
||||
private Shard shard;
|
||||
private LocusContextIterator loci;
|
||||
|
||||
/**
|
||||
* Gets the position to which the last seek was requested.
|
||||
*/
|
||||
private GenomeLoc seekPoint;
|
||||
|
||||
/**
|
||||
* What's the context for the last locus accessed?
|
||||
* @param provider
|
||||
*/
|
||||
private LocusContext nextLocusContext = null;
|
||||
|
||||
private static Logger logger = Logger.getLogger(LocusContextQueue.class);
|
||||
|
||||
/**
|
||||
* Create a new queue of locus contexts, sorted in
|
||||
* @param provider
|
||||
*/
|
||||
public LocusContextQueue(ShardDataProvider provider) {
|
||||
Iterator<SAMRecord> reads = new FilteringIterator(provider.getReadIterator(), new TraversalEngine.locusStreamFilterFunc());
|
||||
this.loci = new LocusContextIteratorByHanger(reads);
|
||||
this.shard = provider.getShard();
|
||||
|
||||
// Seed the state tracking members with the first possible seek position and the first possible locus context.
|
||||
seekPoint = new GenomeLoc(shard.getGenomeLoc().getContigIndex(),shard.getGenomeLoc().getStart());
|
||||
|
||||
if( loci.hasNext() )
|
||||
nextLocusContext = loci.next();
|
||||
else
|
||||
nextLocusContext = this.createEmptyLocusContext(seekPoint);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the locus context at the given position.
|
||||
* @return Locus context, or null if no locus context exists at this position.
|
||||
*/
|
||||
public LocusContext peek() {
|
||||
// Haven't reached the next locus context in the list yet. Return null.
|
||||
if( seekPoint.isBefore(nextLocusContext.getLocation()) )
|
||||
return createEmptyLocusContext(seekPoint);
|
||||
|
||||
return nextLocusContext;
|
||||
}
|
||||
|
||||
/**
|
||||
* Seek to the given point the queue of locus contexts.
|
||||
* @param target Target base pair to which to seek. Must be a single base pair.
|
||||
* @return an instance of itself for parameter chaining.
|
||||
*/
|
||||
public LocusContextQueue seek(GenomeLoc target) {
|
||||
if( !target.isSingleBP() )
|
||||
throw new IllegalArgumentException("Seek point must be a single base pair.");
|
||||
|
||||
// If outside the range of the target, throw an illegal argument exception.
|
||||
if( target.isBefore(shard.getGenomeLoc()) || target.isPast(shard.getGenomeLoc()))
|
||||
throw new IllegalArgumentException(String.format("Target is out of range; target = %s, valid range = %s",target,shard.getGenomeLoc()));
|
||||
|
||||
seekPoint = (GenomeLoc)target.clone();
|
||||
|
||||
// Search for the next locus context following the target positions.
|
||||
while (nextLocusContext.getLocation().isBefore(target) && loci.hasNext() ) {
|
||||
logger.debug(String.format(" current locus is %s vs %s => %d", nextLocusContext.getLocation(),
|
||||
target,
|
||||
nextLocusContext.getLocation().compareTo(target)));
|
||||
nextLocusContext = loci.next();
|
||||
}
|
||||
|
||||
// Couldn't find a next? Force the nextLocusContext to null.
|
||||
if( nextLocusContext.getLocation().isBefore(target) && !loci.hasNext() )
|
||||
nextLocusContext = createEmptyLocusContext( seekPoint );
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the point to which the queue has currently seeked.
|
||||
* @return Single bp position where the queue has been positioned. A locus context may or may not
|
||||
* exist at this point.
|
||||
*/
|
||||
public GenomeLoc getSeekPoint() {
|
||||
return seekPoint;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a blank locus context at the specified location.
|
||||
* @param site Site at which to create the blank locus context.
|
||||
* @return empty context.
|
||||
*/
|
||||
private LocusContext createEmptyLocusContext( GenomeLoc site ) {
|
||||
return new LocusContext(site, new ArrayList<SAMRecord>(), new ArrayList<Integer>());
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.NoSuchElementException;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 12, 2009
|
||||
* Time: 10:52:47 AM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Iterates through all of the loci provided in the reference.
|
||||
*/
|
||||
public class ReferenceLocusIterator implements LocusIterator {
|
||||
/**
|
||||
* The entire region over which we're iterating.
|
||||
*/
|
||||
private GenomeLoc completeLocus;
|
||||
|
||||
/**
|
||||
* The current position in the traversal.
|
||||
*/
|
||||
private GenomeLoc currentLocus;
|
||||
|
||||
/**
|
||||
* Creates an iterator that can traverse over the entire
|
||||
* reference specified in the given ShardDataProvider.
|
||||
* @param provider Data provider to use as a backing source.
|
||||
* Provider must have a reference (hasReference() == true).
|
||||
*/
|
||||
public ReferenceLocusIterator( ShardDataProvider provider ) {
|
||||
if( !provider.hasReference() )
|
||||
throw new StingException("Trying to iterate through reference, but no reference has been provided.");
|
||||
completeLocus = provider.getShard().getGenomeLoc();
|
||||
currentLocus = new GenomeLoc(completeLocus.getContig(),completeLocus.getStart());
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the iterator still within the locus?
|
||||
* @return True if the iterator has more elements. False otherwise.
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
return !currentLocus.isPast(completeLocus);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the next single-base locus context bounded by the iterator.
|
||||
* @return GenomeLoc representing the next single-base locus context.
|
||||
*/
|
||||
public GenomeLoc next() {
|
||||
if( !hasNext() )
|
||||
throw new NoSuchElementException("No elements remaining in bounded reference region.");
|
||||
GenomeLoc toReturn = (GenomeLoc)currentLocus.clone();
|
||||
currentLocus.incPos();
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException( "ReferenceLocusIterator is read-only" );
|
||||
}
|
||||
}
|
||||
|
|
@ -2,9 +2,7 @@ package org.broadinstitute.sting.gatk.dataSources.providers;
|
|||
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard;
|
||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
|
@ -26,28 +24,28 @@ import net.sf.samtools.SAMRecord;
|
|||
* tries to assemble as much as possible with it.
|
||||
*/
|
||||
public class ShardDataProvider {
|
||||
/**
|
||||
* The shard over which we're providing data.
|
||||
*/
|
||||
private final Shard shard;
|
||||
|
||||
/**
|
||||
* The raw collection of reads.
|
||||
*/
|
||||
private final StingSAMIterator reads;
|
||||
|
||||
/**
|
||||
* Information about the locus. Can be accessed mutually exclusively
|
||||
* with the reads iterator.
|
||||
*/
|
||||
private final LocusContextProvider locusContextProvider;
|
||||
|
||||
/**
|
||||
* Provider of reference data for this particular shard.
|
||||
*/
|
||||
private final ReferenceProvider referenceProvider;
|
||||
|
||||
/**
|
||||
* Users should only drive using the reads directly or using the locus.
|
||||
* Perhaps in the future we can support direct simultaneous access to both.
|
||||
* Retrieves the shard associated with this data provider.
|
||||
* @return The shard associated with this data provider.
|
||||
*/
|
||||
private enum LastAccessType { READS, LOCUS };
|
||||
private LastAccessType lastAccessed = null;
|
||||
public Shard getShard() {
|
||||
return shard;
|
||||
}
|
||||
|
||||
/**
|
||||
* Can this data source provide reads?
|
||||
|
|
@ -57,14 +55,6 @@ public class ShardDataProvider {
|
|||
return reads != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Can this data source provide a locus context?
|
||||
* @return True if possible, false otherwise.
|
||||
*/
|
||||
public boolean hasLocusContext() {
|
||||
return locusContextProvider != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Can this data source provide reference information?
|
||||
* @return True if possible, false otherwise.
|
||||
|
|
@ -79,26 +69,9 @@ public class ShardDataProvider {
|
|||
* @return An iterator over all reads in this shard.
|
||||
*/
|
||||
public StingSAMIterator getReadIterator() {
|
||||
if( LastAccessType.LOCUS.equals(lastAccessed) )
|
||||
throw new UnsupportedOperationException("Cannot mix direct access to reads with access to locus context");
|
||||
lastAccessed = LastAccessType.READS;
|
||||
return reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a locus context for a particular region on the genome.
|
||||
* WARNING: Right now, this cannot be concurrently accessed with the read iterator.
|
||||
* WARNING: Right now, accesses must be sequential along the genome.
|
||||
* @param genomeLoc The location for which to determine the context.
|
||||
* @return The context associated with this location.
|
||||
*/
|
||||
public LocusContext getLocusContext( GenomeLoc genomeLoc ) {
|
||||
if( LastAccessType.READS.equals(lastAccessed) )
|
||||
throw new UnsupportedOperationException("Cannot mix direct access to reads with access to locus context");
|
||||
lastAccessed = LastAccessType.LOCUS;
|
||||
return locusContextProvider.getLocusContext( genomeLoc );
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the reference base associated with this particular point on the genome.
|
||||
* @param genomeLoc Region for which to retrieve the base. GenomeLoc must represent a 1-base region.
|
||||
|
|
@ -124,14 +97,23 @@ public class ShardDataProvider {
|
|||
* @param reference A getter for a section of the reference.
|
||||
*/
|
||||
public ShardDataProvider( Shard shard, SAMDataSource reads, IndexedFastaSequenceFile reference ) {
|
||||
this.shard = shard;
|
||||
// Provide basic reads information.
|
||||
this.reads = reads.seek( shard );
|
||||
// Watch out! the locus context provider will start prefetching data off the queue. Only create this
|
||||
// if absolutely necessary.
|
||||
this.locusContextProvider = !(shard instanceof ReadShard) ? new LocusContextProvider(this.reads) : null;
|
||||
this.referenceProvider = (reference != null) ? new ReferenceProvider(reference,shard) : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Skeletal, package protected constructor for unit tests which require a ShardDataProvider.
|
||||
* @param shard the shard
|
||||
* @param reads reads iterator.
|
||||
*/
|
||||
ShardDataProvider( Shard shard, StingSAMIterator reads ) {
|
||||
this.shard = shard;
|
||||
this.reads = reads;
|
||||
this.referenceProvider = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retire this shard.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -0,0 +1,24 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.Iterator;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 12, 2009
|
||||
* Time: 10:48:56 AM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* An iterator for iterating through loci on a genome.
|
||||
*/
|
||||
|
||||
public interface LocusIterator extends Iterator<GenomeLoc> {
|
||||
}
|
||||
|
|
@ -3,20 +3,16 @@ package org.broadinstitute.sting.gatk.traversals;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextProvider;
|
||||
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
|
@ -137,9 +133,6 @@ public class TraverseDuplicates extends TraversalEngine {
|
|||
|
||||
/**
|
||||
* Traverse by reads, given the data and the walker
|
||||
* @param walker the walker to execute over
|
||||
* @param shard the shard of data to feed the walker
|
||||
* @param locusProvider the factory for loci
|
||||
* @param sum of type T, the return from the walker
|
||||
* @param <M> the generic type
|
||||
* @param <T> the return type of the reduce function
|
||||
|
|
@ -294,7 +287,6 @@ public class TraverseDuplicates extends TraversalEngine {
|
|||
* Traverse by reads, given the data and the walker
|
||||
* @param walker the walker to execute over
|
||||
* @param shard the shard of data to feed the walker
|
||||
* @param locusProvider the factory for loci
|
||||
* @param sum of type T, the return from the walker
|
||||
* @param <M> the generic type
|
||||
* @param <T> the return type of the reduce function
|
||||
|
|
@ -302,8 +294,7 @@ public class TraverseDuplicates extends TraversalEngine {
|
|||
*/
|
||||
public <M, T> T traverse(Walker<M, T> walker,
|
||||
Shard shard,
|
||||
LocusContextProvider locusProvider,
|
||||
BoundedReadIterator readIter,
|
||||
ShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
|
||||
logger.debug(String.format("TraverseDuplicates.traverse Genomic interval is %s", ((ReadShard)shard).getSize()));
|
||||
|
|
@ -319,8 +310,8 @@ public class TraverseDuplicates extends TraversalEngine {
|
|||
// -> those with the same mate pair position, for paired reads
|
||||
// -> those flagged as unpaired and duplicated but having the same start and end and
|
||||
|
||||
FilteringIterator filterIter = new FilteringIterator(readIter, new duplicateStreamFilterFunc());
|
||||
FilteringIterator filterIter = new FilteringIterator(dataProvider.getReadIterator(), new duplicateStreamFilterFunc());
|
||||
PushbackIterator<SAMRecord> iter = new PushbackIterator<SAMRecord>(filterIter);
|
||||
return actuallyTraverse(dupWalker, readIter, sum);
|
||||
return actuallyTraverse(dupWalker, iter, sum);
|
||||
}
|
||||
}
|
||||
|
|
@ -4,14 +4,13 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextProvider;
|
||||
import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceProvider;
|
||||
import org.broadinstitute.sting.gatk.dataSources.providers.InvalidPositionException;
|
||||
import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceLocusIterator;
|
||||
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextQueue;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.apache.log4j.Logger;
|
||||
|
|
@ -57,18 +56,20 @@ public class TraverseLociByReference extends TraversalEngine {
|
|||
throw new IllegalArgumentException("Walker isn't a loci walker!");
|
||||
|
||||
LocusWalker<M, T> locusWalker = (LocusWalker<M, T>)walker;
|
||||
GenomeLoc loc = shard.getGenomeLoc();
|
||||
|
||||
LocusIterator locusIterator = new ReferenceLocusIterator( dataProvider );
|
||||
LocusContextQueue locusContextQueue = new LocusContextQueue( dataProvider );
|
||||
|
||||
// We keep processing while the next reference location is within the interval
|
||||
for( long pos = loc.getStart(); pos <= loc.getStop(); pos++ ) {
|
||||
GenomeLoc site = new GenomeLoc( loc.getContig(), pos );
|
||||
while( locusIterator.hasNext() ) {
|
||||
GenomeLoc site = locusIterator.next();
|
||||
|
||||
TraversalStatistics.nRecords++;
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this locus
|
||||
final RefMetaDataTracker tracker = getReferenceOrderedDataAtLocus( site );
|
||||
|
||||
LocusContext locus = dataProvider.getLocusContext( site );
|
||||
LocusContext locus = locusContextQueue.seek( site ).peek();
|
||||
char refBase = dataProvider.getReferenceBase( site );
|
||||
|
||||
if ( DOWNSAMPLE_BY_COVERAGE )
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ import java.util.regex.Pattern;
|
|||
*
|
||||
*
|
||||
*/
|
||||
public class GenomeLoc implements Comparable<GenomeLoc> {
|
||||
public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
|
||||
private static Logger logger = Logger.getLogger(GenomeLoc.class);
|
||||
|
||||
private Integer contigIndex;
|
||||
|
|
@ -432,6 +432,11 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
return this.compareTo(left) > -1 && this.compareTo(right) < 1;
|
||||
}
|
||||
|
||||
public final boolean isBefore( GenomeLoc that ) {
|
||||
int comparison = this.compareContigs(that);
|
||||
return ( comparison == -1 || ( comparison == 0 && this.getStop() < that.getStart() ));
|
||||
}
|
||||
|
||||
public final boolean isPast( GenomeLoc that ) {
|
||||
int comparison = this.compareContigs(that);
|
||||
return ( comparison == 1 || ( comparison == 0 && this.getStart() > that.getStop() ));
|
||||
|
|
@ -451,16 +456,32 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
return n;
|
||||
}
|
||||
|
||||
// Dangerous
|
||||
// public boolean equals(Object o) {
|
||||
// // Not strictly necessary, but often a good optimization
|
||||
// if (this == o)
|
||||
// return true;
|
||||
// if (!(o instanceof GenomeLoc))
|
||||
// return false;
|
||||
// else
|
||||
// return compareContigs((GenomeLoc)o) == 0;
|
||||
// }
|
||||
/**
|
||||
* Check to see whether two genomeLocs are equal.
|
||||
* Note that this implementation ignores the contigInfo object.
|
||||
* @param other Other contig to compare.
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if(other == null)
|
||||
return false;
|
||||
if(other instanceof GenomeLoc) {
|
||||
GenomeLoc otherGenomeLoc = (GenomeLoc)other;
|
||||
return this.contigIndex.equals(otherGenomeLoc.contigIndex) &&
|
||||
this.start == otherGenomeLoc.start &&
|
||||
this.stop == otherGenomeLoc.stop;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a new GenomeLoc at this same position.
|
||||
* @return A GenomeLoc with the same contents as the current loc.
|
||||
*/
|
||||
@Override
|
||||
public Object clone() {
|
||||
return new GenomeLoc(this);
|
||||
}
|
||||
|
||||
//
|
||||
// Comparison operations
|
||||
|
|
|
|||
|
|
@ -0,0 +1,320 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.LocusShard;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.Collections;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
||||
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 12, 2009
|
||||
* Time: 2:34:46 PM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test the locus context queue.
|
||||
*/
|
||||
public class LocusContextQueueTest extends BaseTest {
|
||||
private static ReferenceSequenceFile sequenceSourceFile = null;
|
||||
|
||||
@BeforeClass
|
||||
public static void setupGenomeLoc() throws FileNotFoundException {
|
||||
sequenceSourceFile = fakeReferenceSequenceFile();
|
||||
GenomeLoc.setupRefContigOrdering(sequenceSourceFile);
|
||||
}
|
||||
|
||||
private static ReferenceSequenceFile fakeReferenceSequenceFile() {
|
||||
return new ReferenceSequenceFile() {
|
||||
public SAMSequenceDictionary getSequenceDictionary() {
|
||||
SAMSequenceRecord sequenceRecord = new SAMSequenceRecord("chr1");
|
||||
SAMSequenceDictionary dictionary = new SAMSequenceDictionary(Collections.singletonList(sequenceRecord));
|
||||
return dictionary;
|
||||
}
|
||||
public ReferenceSequence nextSequence() { throw new UnsupportedOperationException("Fake implementation doesn't support a getter"); }
|
||||
};
|
||||
}
|
||||
|
||||
@Test
|
||||
public void emptyLocusContextTest() {
|
||||
SAMRecordIterator iterator = new SAMRecordIterator();
|
||||
|
||||
GenomeLoc shardBounds = new GenomeLoc("chr1",1,5);
|
||||
Shard shard = new LocusShard(shardBounds);
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
|
||||
LocusContextQueue queue = new LocusContextQueue( dataProvider );
|
||||
|
||||
testReadsInContext( queue, shard.getGenomeLoc(), Collections.<SAMRecord>emptyList() );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void singleReadTest() {
|
||||
SAMRecord read = buildSAMRecord("chr1",1,5);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
GenomeLoc shardBounds = new GenomeLoc("chr1",1,5);
|
||||
Shard shard = new LocusShard(shardBounds);
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
|
||||
LocusContextQueue queue = new LocusContextQueue( dataProvider );
|
||||
|
||||
testReadsInContext( queue, shard.getGenomeLoc(), Collections.singletonList(read) );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readCoveringFirstPartTest() {
|
||||
SAMRecord read = buildSAMRecord("chr1",1,5);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusContextQueue queue = new LocusContextQueue( dataProvider );
|
||||
|
||||
testReadsInContext( queue, shard.getGenomeLoc(), Collections.singletonList(read) );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readCoveringLastPartTest() {
|
||||
SAMRecord read = buildSAMRecord("chr1",6,10);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusContextQueue queue = new LocusContextQueue( dataProvider );
|
||||
|
||||
testReadsInContext( queue, shard.getGenomeLoc(), Collections.singletonList(read) );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readCoveringMiddleTest() {
|
||||
SAMRecord read = buildSAMRecord("chr1",3,7);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusContextQueue queue = new LocusContextQueue( dataProvider );
|
||||
|
||||
testReadsInContext( queue, shard.getGenomeLoc(), Collections.singletonList(read) );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readOverlappingStartTest() {
|
||||
SAMRecord read = buildSAMRecord("chr1",1,10);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",6,15));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusContextQueue queue = new LocusContextQueue( dataProvider );
|
||||
|
||||
testReadsInContext( queue, shard.getGenomeLoc(), Collections.singletonList(read) );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readOverlappingEndTest() {
|
||||
SAMRecord read = buildSAMRecord("chr1",6,15);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusContextQueue queue = new LocusContextQueue( dataProvider );
|
||||
|
||||
testReadsInContext( queue, shard.getGenomeLoc(), Collections.singletonList(read) );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readsSpanningTest() {
|
||||
SAMRecord read1 = buildSAMRecord("chr1",1,5);
|
||||
SAMRecord read2 = buildSAMRecord("chr1",6,10);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read1,read2);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusContextQueue queue = new LocusContextQueue( dataProvider );
|
||||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
Collections.addAll(expectedReads,read1,read2);
|
||||
testReadsInContext( queue, shard.getGenomeLoc(), expectedReads );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void duplicateReadsTest() {
|
||||
SAMRecord read1 = buildSAMRecord("chr1",1,5);
|
||||
SAMRecord read2 = buildSAMRecord("chr1",1,5);
|
||||
SAMRecord read3 = buildSAMRecord("chr1",6,10);
|
||||
SAMRecord read4 = buildSAMRecord("chr1",6,10);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read1,read2,read3,read4);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusContextQueue queue = new LocusContextQueue( dataProvider );
|
||||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
Collections.addAll(expectedReads,read1,read2,read3,read4);
|
||||
testReadsInContext( queue, shard.getGenomeLoc(), expectedReads );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void cascadingReadsWithinBoundsTest() {
|
||||
SAMRecord read1 = buildSAMRecord("chr1",2,6);
|
||||
SAMRecord read2 = buildSAMRecord("chr1",3,7);
|
||||
SAMRecord read3 = buildSAMRecord("chr1",4,8);
|
||||
SAMRecord read4 = buildSAMRecord("chr1",5,9);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read1,read2,read3,read4);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusContextQueue queue = new LocusContextQueue( dataProvider );
|
||||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
Collections.addAll(expectedReads,read1,read2,read3,read4);
|
||||
testReadsInContext( queue, shard.getGenomeLoc(), expectedReads );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void cascadingReadsAtBoundsTest() {
|
||||
SAMRecord read1 = buildSAMRecord("chr1",1,5);
|
||||
SAMRecord read2 = buildSAMRecord("chr1",2,6);
|
||||
SAMRecord read3 = buildSAMRecord("chr1",3,7);
|
||||
SAMRecord read4 = buildSAMRecord("chr1",4,8);
|
||||
SAMRecord read5 = buildSAMRecord("chr1",5,9);
|
||||
SAMRecord read6 = buildSAMRecord("chr1",6,10);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read1,read2,read3,read4,read5,read6);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusContextQueue queue = new LocusContextQueue( dataProvider );
|
||||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
Collections.addAll(expectedReads,read1,read2,read3,read4,read5,read6);
|
||||
testReadsInContext( queue, shard.getGenomeLoc(), expectedReads );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void cascadingReadsOverlappingBoundsTest() {
|
||||
SAMRecord read01 = buildSAMRecord("chr1",1,5);
|
||||
SAMRecord read02 = buildSAMRecord("chr1",2,6);
|
||||
SAMRecord read03 = buildSAMRecord("chr1",3,7);
|
||||
SAMRecord read04 = buildSAMRecord("chr1",4,8);
|
||||
SAMRecord read05 = buildSAMRecord("chr1",5,9);
|
||||
SAMRecord read06 = buildSAMRecord("chr1",6,10);
|
||||
SAMRecord read07 = buildSAMRecord("chr1",7,11);
|
||||
SAMRecord read08 = buildSAMRecord("chr1",8,12);
|
||||
SAMRecord read09 = buildSAMRecord("chr1",9,13);
|
||||
SAMRecord read10 = buildSAMRecord("chr1",10,14);
|
||||
SAMRecord read11 = buildSAMRecord("chr1",11,15);
|
||||
SAMRecord read12 = buildSAMRecord("chr1",12,16);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read01,read02,read03,read04,read05,read06,
|
||||
read07,read08,read09,read10,read11,read12);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",6,15));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusContextQueue queue = new LocusContextQueue( dataProvider );
|
||||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
Collections.addAll(expectedReads,read01,read02,read03,read04,read05,read06,
|
||||
read07,read08,read09,read10,read11,read12);
|
||||
testReadsInContext( queue, shard.getGenomeLoc(), expectedReads );
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the reads according to an independently derived context.
|
||||
* @param queue
|
||||
* @param bounds
|
||||
* @param reads
|
||||
*/
|
||||
private void testReadsInContext( LocusContextQueue queue, GenomeLoc bounds, List<SAMRecord> reads ) {
|
||||
Assert.assertEquals("Initial position of queue is incorrect", new GenomeLoc(bounds.getContig(),bounds.getStart()), queue.getSeekPoint() );
|
||||
|
||||
for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) {
|
||||
GenomeLoc site = new GenomeLoc("chr1",i);
|
||||
queue.seek(site);
|
||||
Assert.assertEquals("Seeked queue is incorrect", site, queue.getSeekPoint() );
|
||||
|
||||
LocusContext locusContext = queue.peek();
|
||||
Assert.assertEquals("Target locus context location is incorrect", site, locusContext.getLocation() );
|
||||
int expectedReadsAtSite = 0;
|
||||
|
||||
for( SAMRecord read: reads ) {
|
||||
if(new GenomeLoc(read).containsP(locusContext.getLocation())) {
|
||||
Assert.assertTrue("Target locus context does not contain reads", locusContext.getReads().contains(read) );
|
||||
expectedReadsAtSite++;
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertEquals("Found wrong number of reads at site", expectedReadsAtSite, locusContext.getReads().size());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private SAMRecord buildSAMRecord( String contig, int alignmentStart, int alignmentEnd ) {
|
||||
SAMFileHeader header = new SAMFileHeader();
|
||||
header.setSequenceDictionary(sequenceSourceFile.getSequenceDictionary());
|
||||
|
||||
SAMRecord record = new SAMRecord(header);
|
||||
|
||||
record.setReferenceIndex(sequenceSourceFile.getSequenceDictionary().getSequenceIndex(contig));
|
||||
record.setAlignmentStart(alignmentStart);
|
||||
Cigar cigar = new Cigar();
|
||||
cigar.add(new CigarElement(alignmentEnd-alignmentStart+1,CigarOperator.M));
|
||||
record.setCigar(cigar);
|
||||
return record;
|
||||
}
|
||||
|
||||
private class SAMRecordIterator implements StingSAMIterator {
|
||||
private Iterator<SAMRecord> backingIterator = null;
|
||||
|
||||
public SAMRecordIterator( SAMRecord... reads ) {
|
||||
List<SAMRecord> backingList = new ArrayList<SAMRecord>();
|
||||
backingList.addAll(Arrays.asList(reads));
|
||||
backingIterator = backingList.iterator();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return backingIterator.hasNext();
|
||||
}
|
||||
|
||||
public SAMRecord next() {
|
||||
return backingIterator.next();
|
||||
}
|
||||
|
||||
public Iterator<SAMRecord> iterator() {
|
||||
return this;
|
||||
}
|
||||
|
||||
public void close() {
|
||||
// NO-OP.
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can't remove from a read-only iterator");
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue