Basic support for very simple index-driven locus traversals. Interface has been changed to

support batched intervals in a single shard, but intervals are not yet compressed into a single
shard.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2730 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-01-29 03:14:26 +00:00
parent 4810e9c9cd
commit 3d922a019f
36 changed files with 439 additions and 266 deletions

View File

@ -184,6 +184,19 @@ class BAMFileReader2
return mCurrentIterator;
}
public List<Chunk> getOverlappingFilePointers(final String sequence, final int start, final int end) {
long[] filePointers = null;
final SAMFileHeader fileHeader = getFileHeader();
int referenceIndex = fileHeader.getSequenceIndex(sequence);
if (referenceIndex != -1) {
final BAMFileIndex fileIndex = getFileIndex();
filePointers = fileIndex.getSearchBins(referenceIndex, start, end);
}
return Chunk.toChunkList(filePointers);
}
/**
* Prepare to iterate through the SAMRecords that match the given interval.
* Only a single iterator on a BAMFile can be extant at a time. The previous one must be closed

View File

@ -199,9 +199,16 @@ public class SAMFileReader2 implements Iterable<SAMRecord> {
*/
public CloseableIterator<SAMRecord> iterator(List<Chunk> chunks) {
// TODO: Add sanity checks so that we're not doing this against a BAM file.
if(!(mReader instanceof ReaderImplementation2))
if(!(mReader instanceof BAMFileReader2))
throw new PicardException("This call requires a ReaderImplementation2-compliant interface");
return ((ReaderImplementation2)mReader).getIterator(chunks);
return ((BAMFileReader2)mReader).getIterator(chunks);
}
public List<Chunk> getOverlappingFilePointers(final String sequence, final int start, final int end) {
// TODO: Add sanity checks so that we're not doing this against a BAM file.
if(!(mReader instanceof BAMFileReader2))
throw new PicardException("This call requires a ReaderImplementation2-compliant interface");
return ((BAMFileReader2)mReader).getOverlappingFilePointers(sequence,start,end);
}
/**

View File

@ -609,9 +609,8 @@ public class GenomeAnalysisEngine {
shardType = (walker.isReduceByInterval()) ?
ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL :
ShardStrategyFactory.SHATTER_STRATEGY.LINEAR;
shardStrategy = ShardStrategyFactory.shatter(readsDataSource,
shardType,
argCollection.experimentalSharding ? ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL : shardType,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE,
intervals, maxIterations);

View File

@ -46,7 +46,7 @@ public class AllLocusView extends LocusView {
public AllLocusView(ShardDataProvider provider) {
super( provider );
// Seed the state tracking members with the first possible seek position and the first possible locus context.
locusIterator = new GenomeLocusIterator( provider.getShard().getGenomeLoc() );
locusIterator = new GenomeLocusIterator( provider.getShard().getGenomeLocs() );
if( locusIterator.hasNext() ) {
nextPosition = locusIterator.next();
nextLocus = hasNextLocus() ? nextLocus() : createEmptyLocus(nextPosition);

View File

@ -9,6 +9,8 @@ import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import net.sf.picard.reference.ReferenceSequence;
import net.sf.samtools.util.StringUtil;
import java.util.List;
/*
* Copyright (c) 2009 The Broad Institute
*
@ -69,7 +71,7 @@ public class LocusReferenceView extends ReferenceView {
*/
public LocusReferenceView( ShardDataProvider provider ) {
super(provider);
bounds = provider.getShard().getGenomeLoc();
initializeBounds(provider);
windowStart = windowStop = 0;
initializeReferenceSequence(bounds);
}
@ -80,7 +82,7 @@ public class LocusReferenceView extends ReferenceView {
*/
public LocusReferenceView( Walker walker, ShardDataProvider provider ) {
super( provider );
bounds = provider.getShard().getGenomeLoc();
initializeBounds(provider);
// Retrieve information about the window being accessed.
if( walker.getClass().isAnnotationPresent(Reference.class) ) {
@ -131,6 +133,22 @@ public class LocusReferenceView extends ReferenceView {
initializeReferenceSequence(GenomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop));
}
private void initializeBounds(ShardDataProvider provider) {
List<GenomeLoc> loci = provider.getShard().getGenomeLocs();
if(loci.isEmpty()) {
bounds = null;
return;
}
GenomeLoc firstLocus = loci.get(0);
GenomeLoc lastLocus = loci.get(loci.size()-1);
if(firstLocus.getContigIndex() != lastLocus.getContigIndex())
throw new StingException("LocusReferenceView currently only supports multiple intervals on the same contig.");
bounds = GenomeLocParser.createGenomeLoc(firstLocus.getContig(),firstLocus.getStart(),lastLocus.getStop());
}
/**
* Initialize reference sequence data using the given locus.
* @param locus

View File

@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Arrays;
import java.util.Collection;
@ -111,7 +112,8 @@ public abstract class LocusView extends LocusIterator implements View {
* @return True if another locus context is bounded by this shard.
*/
protected boolean hasNextLocus() {
return nextLocus != null && (shard.getGenomeLoc() == null || !nextLocus.getLocation().isPast(shard.getGenomeLoc()));
GenomeLoc lastLocus = !shard.getGenomeLocs().isEmpty() ? shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1) : null;
return nextLocus != null && (lastLocus == null || !nextLocus.getLocation().isPast(lastLocus));
}
/**
@ -120,7 +122,9 @@ public abstract class LocusView extends LocusIterator implements View {
* @throw NoSuchElementException if the next element is missing.
*/
protected AlignmentContext nextLocus() {
if( nextLocus == null || (shard.getGenomeLoc() != null && nextLocus.getLocation().isPast(shard.getGenomeLoc())) )
GenomeLoc lastLocus = !shard.getGenomeLocs().isEmpty() ? shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1) : null;
if( nextLocus == null || (lastLocus != null && nextLocus.getLocation().isPast(lastLocus)) )
throw new NoSuchElementException("No more elements remain in locus context queue.");
// Cache the current and apply filtering.
@ -131,7 +135,7 @@ public abstract class LocusView extends LocusIterator implements View {
nextLocus = loci.next();
if( sourceInfo.getDownsampleToCoverage() != null )
current.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() );
if( shard.getGenomeLoc() != null && nextLocus.getLocation().isPast(shard.getGenomeLoc()) )
if( lastLocus != null && nextLocus.getLocation().isPast(lastLocus) )
nextLocus = null;
}
else
@ -152,13 +156,13 @@ public abstract class LocusView extends LocusIterator implements View {
nextLocus = loci.next();
// If the location of this shard is available, trim the data stream to match the shard.
if(shard.getGenomeLoc() != null) {
if(!shard.getGenomeLocs().isEmpty()) {
// Iterate past cruft at the beginning to the first locus in the shard.
while( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLoc()) && loci.hasNext() )
while( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLocs().get(0)) && loci.hasNext() )
nextLocus = loci.next();
// If nothing in the shard was found, indicate that by setting nextAlignmentContext to null.
if( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLoc()) )
if( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLocs().get(0)) )
nextLocus = null;
}
}

View File

@ -11,7 +11,6 @@ import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
import java.util.*;
import net.sf.samtools.SAMRecord;
/**
* User: hanna
* Date: May 21, 2009
@ -38,9 +37,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
GenomeLoc lastLoc = null;
RODRecordList<ReferenceOrderedDatum> interval = null;
// broken support for multi-locus rods
//List<ReferenceOrderedDatum> multiLocusRODs = new LinkedList<ReferenceOrderedDatum>();
/**
* The data sources along with their current states.
*/
@ -61,18 +57,17 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
public RodLocusView( ShardDataProvider provider ) {
super(provider);
GenomeLoc loc = provider.getShard().getGenomeLoc();
GenomeLoc firstLoc = provider.getShard().getGenomeLocs().get(0);
List< Iterator<RODRecordList<ReferenceOrderedDatum>> > iterators = new LinkedList< Iterator<RODRecordList<ReferenceOrderedDatum>> >();
for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() ) {
if ( DEBUG ) System.out.printf("Shard is %s%n", loc);
if ( DEBUG ) System.out.printf("Shard is %s%n", provider.getShard().getGenomeLocs());
// grab the ROD iterator from the data source, and compute the first location in this shard, forwarding
// the iterator to immediately before it, so that it can be added to the merging iterator primed for
// next() to return the first real ROD in this shard
SeekableRODIterator it = (SeekableRODIterator)dataSource.seek(provider.getShard());
GenomeLoc shardLoc = provider.getShard().getGenomeLoc();
it.seekForward(GenomeLocParser.createGenomeLoc(shardLoc.getContigIndex(), shardLoc.getStart()-1, shardLoc.getStart()-1));
it.seekForward(GenomeLocParser.createGenomeLoc(firstLoc.getContigIndex(), firstLoc.getStart()-1));
states.add(new ReferenceOrderedDataState(dataSource,it));
@ -99,7 +94,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
if ( ! rodQueue.hasNext() )
return false;
else {
return ! rodQueue.peekLocation().isPast(shard.getGenomeLoc());
GenomeLoc lastLocus = shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1);
return ! rodQueue.peekLocation().isPast(lastLocus);
}
}
@ -148,11 +144,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
return t;
}
private Collection<RODRecordList<ReferenceOrderedDatum>> getSpanningTracks(ReferenceOrderedDatum marker) {
RODRecordList<ReferenceOrderedDatum> wrapper = new RODRecordList<ReferenceOrderedDatum>(marker.getName(),Collections.singletonList(marker),marker.getLocation());
return rodQueue.allElementsLTE(wrapper);
}
private Collection<RODRecordList<ReferenceOrderedDatum>> getSpanningTracks(RODRecordList<ReferenceOrderedDatum> marker) {
return rodQueue.allElementsLTE(marker);
}
@ -173,7 +164,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
if ( lastLoc == null ) {
// special case -- we're at the start
//System.out.printf("Cur=%s, shard=%s%n", currentPos, shard.getGenomeLoc());
skippedBases = currentPos.getStart() - shard.getGenomeLoc().getStart();
GenomeLoc firstLoc = shard.getGenomeLocs().get(0);
skippedBases = currentPos.getStart() - firstLoc.getStart();
} else {
//System.out.printf("Cur=%s, last=%s%n", currentPos, lastLoc);
skippedBases = currentPos.minus(lastLoc) - 1;
@ -181,7 +173,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
if ( skippedBases < -1 ) { // minus 1 value is ok
throw new RuntimeException(String.format("BUG: skipped bases=%d is < 0: cur=%s vs. last=%s, shard=%s",
skippedBases, currentPos, lastLoc, shard.getGenomeLoc()));
skippedBases, currentPos, lastLoc, shard.getGenomeLocs()));
}
return Math.max(skippedBases, 0);
}
@ -191,9 +183,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
* @return
*/
public GenomeLoc getLocOneBeyondShard() {
return GenomeLocParser.createGenomeLoc( shard.getGenomeLoc().getContigIndex(),
shard.getGenomeLoc().getStop()+1,
shard.getGenomeLoc().getStop()+1);
GenomeLoc lastLocus = !shard.getGenomeLocs().isEmpty() ? shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1) : null;
return GenomeLocParser.createGenomeLoc(lastLocus.getContigIndex(),lastLocus.getStop()+1);
}
/**

View File

@ -1,106 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.shards;
import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @author aaron
* @version 1.0
* @date Apr 6, 2009
* <p/>
* Class LinearShard
* <p/>
* A exponential strategy
*/
public class ExpGrowthLocusShardStrategy extends LocusShardStrategy {
// fixed size
private long baseSize = 100000;
private long currentExp = 0;
/**
* the constructor, taking a seq dictionary to parse out contigs
*
* @param dic the seq dictionary
*/
ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize, long limitByCount) {
super(dic);
this.limitingFactor = limitByCount;
this.baseSize = startSize;
currentExp = 0;
}
/**
* the constructor, taking a seq dictionary to parse out contigs
*
* @param strat the shatter to convert from
*/
ExpGrowthLocusShardStrategy(LocusShardStrategy strat) {
super(strat);
this.baseSize = strat.nextShardSize();
currentExp = 0;
}
/**
* The constructor, for a genomic list, start size, and a reference dictionary
*
* @param dic the reference dictionary
* @param startSize the starting size of the shard
* @param lst locations to iterate from
*/
ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst, long limitByCount) {
super(dic, lst);
this.limitingFactor = limitByCount;
this.baseSize = startSize;
this.currentExp = 0;
}
/**
* set the next shards size
*
* @param size adjust the next size to this
*/
public void adjustNextShardSize(long size) {
baseSize = size;
currentExp = 0;
}
/**
* This is how the various shards strategies implements their approach
*
* @return the next shard size
*/
protected long nextShardSize() {
// we grow the exponentially, we just have to make sure we start at zero
++currentExp;
return (long) Math.floor(Math.pow((double) baseSize, (double) currentExp));
}
}

View File

@ -0,0 +1,87 @@
package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import net.sf.samtools.Chunk;
import java.util.List;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* A shard that's delimited based on the index rather than
*/
public class IndexDelimitedLocusShard implements Shard {
/**
* a collection of genomic locations to interate over
*/
private final GenomeLocSortedSet intervals;
/**
* A list of the chunks associated with this shard.
*/
private final List<Chunk> chunks;
IndexDelimitedLocusShard(GenomeLocSortedSet intervals, List<Chunk> chunks) {
this.intervals = intervals;
this.chunks = chunks;
}
/**
* The locations represented by this shard.
* @return the genome location represented by this shard
*/
public List<GenomeLoc> getGenomeLocs() {
return intervals.toList();
}
/**
* Gets the chunks associated with this locus shard.
* @return A list of the chunks to use when retrieving locus data.
*/
public List<Chunk> getChunks() {
return chunks;
}
/**
* returns the type of shard, LOCUS_INTERVAL.
* @return LOCUS_INTERVAL, indicating the shard type
*/
public ShardType getShardType() {
return ShardType.LOCUS_INTERVAL;
}
/**
* String representation of this shard.
* @return A string representation of the boundaries of this shard.
*/
@Override
public String toString() {
return intervals.toString();
}
}

View File

@ -0,0 +1,97 @@
package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.BlockDrivenSAMDataSource;
import java.util.*;
import net.sf.samtools.Chunk;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* A sharding strategy for loci based on reading of the index.
*/
public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
/** our storage of the genomic locations they'd like to shard over */
private final SortedMap<GenomeLoc,List<Chunk>> locations = new TreeMap<GenomeLoc,List<Chunk>>();
/**
* construct the shard strategy from a seq dictionary, a shard size, and and genomeLocs
* @param dataSource Data source from which to load index data.
* @param locations List of locations for which to load data.
*/
IndexDelimitedLocusShardStrategy(SAMDataSource dataSource, GenomeLocSortedSet locations) {
for(GenomeLoc location: locations)
this.locations.put(location,((BlockDrivenSAMDataSource)dataSource).getOverlappingFilePointers(location));
}
/**
* returns true if there are additional shards
*
* @return false if we're done processing shards
*/
public boolean hasNext() {
return ( !locations.isEmpty() );
}
/**
* gets the next Shard
*
* @return the next shard
*/
public IndexDelimitedLocusShard next() {
if (( this.locations == null ) || ( locations.isEmpty() )) {
throw new StingException("IntervalShardStrategy: genomic regions list is empty in next() function.");
}
// get the first region in the list
GenomeLoc loc = locations.firstKey();
List<Chunk> filePointers = locations.get(loc);
locations.remove(loc);
return new IndexDelimitedLocusShard(GenomeLocSortedSet.createSetFromList(Arrays.asList(loc)),filePointers);
}
/** we don't support the remove command */
public void remove() {
throw new UnsupportedOperationException("ShardStrategies don't support remove()");
}
/**
* makes the IntervalShard iterable, i.e. usable in a for loop.
*
* @return
*/
public Iterator<Shard> iterator() {
return this;
}
}

View File

@ -2,6 +2,9 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Collections;
import java.util.List;
/*
* Copyright (c) 2009 The Broad Institute
@ -50,8 +53,8 @@ public class IntervalShard implements Shard {
}
/** @return the genome location represented by this shard */
public GenomeLoc getGenomeLoc() {
return mSet;
public List<GenomeLoc> getGenomeLocs() {
return Collections.singletonList(mSet);
}
/**

View File

@ -2,6 +2,9 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Collections;
import java.util.List;
/**
*
* User: aaron
@ -40,8 +43,8 @@ public class LocusShard implements Shard {
}
/** @return the genome location represented by this shard */
public GenomeLoc getGenomeLoc() {
return mLoc;
public List<GenomeLoc> getGenomeLocs() {
return Collections.singletonList(mLoc);
}
/**

View File

@ -3,6 +3,9 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Collections;
import java.util.List;
/**
* A single, monolithic shard bridging all available data.
* @author mhanna
@ -28,8 +31,8 @@ public class MonolithicShard implements Shard {
* Returns null, indicating that (in this case) the entire genome is covered.
* @return null.
*/
public GenomeLoc getGenomeLoc() {
return null;
public List<GenomeLoc> getGenomeLocs() {
return Collections.emptyList();
}
/**

View File

@ -2,6 +2,8 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.List;
/**
*
* User: aaron
@ -24,7 +26,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
*/
public abstract class ReadShard implements Shard {
/** @return the genome location represented by this shard */
public GenomeLoc getGenomeLoc() {
public List<GenomeLoc> getGenomeLocs() {
throw new UnsupportedOperationException("ReadShard isn't genome loc aware");
}

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.io.Serializable;
import java.util.List;
/**
*
* User: aaron
@ -34,7 +35,7 @@ public interface Shard extends Serializable {
}
/** @return the genome location represented by this shard */
public GenomeLoc getGenomeLoc();
public List<GenomeLoc> getGenomeLocs();
/**
* what kind of shard do we return

View File

@ -38,17 +38,13 @@ import java.io.File;
public class ShardStrategyFactory {
public enum SHATTER_STRATEGY {
LINEAR,
EXPONENTIAL,
READS,
READS_EXPERIMENTAL,
INTERVAL,
MONOLITHIC // Put all of the available data into one shard.
MONOLITHIC, // Put all of the available data into one shard.
LOCUS_EXPERIMENTAL,
READS_EXPERIMENTAL
}
/** our log, which we want to capture anything from this class */
private static Logger logger = Logger.getLogger(ShardStrategyFactory.class);
/**
* get a new shatter strategy
*
@ -75,14 +71,14 @@ public class ShardStrategyFactory {
switch (strat) {
case LINEAR:
return new LinearLocusShardStrategy(dic, startingSize, limitByCount);
case EXPONENTIAL:
return new ExpGrowthLocusShardStrategy(dic, startingSize, limitByCount);
case READS:
return new ReadDelimitedReadShardStrategy(startingSize, limitByCount);
case READS_EXPERIMENTAL:
return new BlockDelimitedReadShardStrategy(dataSource);
case INTERVAL:
throw new StingException("Requested trategy: " + strat + " doesn't work with the limiting count (-M) command line option");
case LOCUS_EXPERIMENTAL:
throw new UnsupportedOperationException("Cannot do experimental locus sharding without intervals");
case READS_EXPERIMENTAL:
return new BlockDelimitedReadShardStrategy(dataSource);
default:
throw new StingException("Strategy: " + strat + " isn't implemented for this type of shatter request");
}
@ -115,12 +111,12 @@ public class ShardStrategyFactory {
switch (strat) {
case LINEAR:
return new LinearLocusShardStrategy(dic, startingSize, lst, limitDataCount);
case EXPONENTIAL:
return new ExpGrowthLocusShardStrategy(dic, startingSize, lst, limitDataCount);
case INTERVAL:
return new IntervalShardStrategy(startingSize, lst, Shard.ShardType.LOCUS_INTERVAL);
case READS:
return new IntervalShardStrategy(startingSize, lst, Shard.ShardType.READ_INTERVAL);
case LOCUS_EXPERIMENTAL:
return new IndexDelimitedLocusShardStrategy(dataSource,lst);
case READS_EXPERIMENTAL:
throw new UnsupportedOperationException("Cannot do experimental read sharding with intervals");
default:

View File

@ -1,19 +1,17 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.BlockDelimitedReadShard;
import org.broadinstitute.sting.gatk.datasources.shards.*;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
import org.broadinstitute.sting.utils.StingException;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader2;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import net.sf.samtools.*;
import net.sf.samtools.util.CloseableIterator;
import java.util.Collection;
import java.util.List;
import java.io.File;
/**
@ -32,26 +30,46 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
public BlockDrivenSAMDataSource(Reads reads) {
super(reads);
logger.warn("Experimental sharding is enabled. Many use cases are not supported. Please use with care.");
if(reads.getReadsFiles().size() > 1)
throw new StingException("Experimental sharding strategy cannot handle multiple BAM files at this point.");
File readsFile = reads.getReadsFiles().get(0);
reader = new SAMFileReader2(readsFile);
reader.setValidationStringency(reads.getValidationStringency());
}
public boolean hasIndex() {
return reader.hasIndex();
}
public List<Chunk> getOverlappingFilePointers(GenomeLoc location) {
return reader.getOverlappingFilePointers(location.getContig(),(int)location.getStart(),(int)location.getStop());
}
public StingSAMIterator seek(Shard shard) {
if(!(shard instanceof BlockDelimitedReadShard))
throw new StingException("Currently unable to operate on types other than block delimited read shards.");
CloseableIterator<SAMRecord> iterator = reader.iterator(((BlockDelimitedReadShard)shard).getChunks());
return applyDecoratingIterators(true,
StingSAMIteratorAdapter.adapt(reads, iterator),
reads.getDownsamplingFraction(),
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
reads.getSupplementalFilters());
if(!(shard instanceof BlockDelimitedReadShard) && !(shard instanceof IndexDelimitedLocusShard))
throw new StingException("BlockDrivenSAMDataSource cannot operate on shards of type: " + shard);
if(shard instanceof ReadShard) {
CloseableIterator<SAMRecord> iterator = reader.iterator(((BlockDelimitedReadShard)shard).getChunks());
return applyDecoratingIterators(true,
StingSAMIteratorAdapter.adapt(reads, iterator),
reads.getDownsamplingFraction(),
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
reads.getSupplementalFilters());
}
else if(shard instanceof IndexDelimitedLocusShard) {
CloseableIterator<SAMRecord> iterator = reader.iterator(((IndexDelimitedLocusShard)shard).getChunks());
return applyDecoratingIterators(false,
StingSAMIteratorAdapter.adapt(reads, iterator),
reads.getDownsamplingFraction(),
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
reads.getSupplementalFilters());
}
throw new UnsupportedOperationException("Unable to infer type of this shard.");
}
/**

View File

@ -4,12 +4,7 @@ import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.util.CloseableIterator;
import net.sf.picard.filter.FilteringIterator;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.picard.sam.SamFileHeaderMerger;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.datasources.shards.ReadShard;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard;
import org.broadinstitute.sting.gatk.datasources.shards.ReadDelimitedReadShard;
@ -21,8 +16,8 @@ import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
import java.io.File;
import java.util.Collection;
import java.util.Collections;
/*
* Copyright (c) 2009 The Broad Institute
@ -178,9 +173,12 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
reads.getSupplementalFilters());
// add the new overlapping detection iterator, if we have a last interval and we're a read based shard
if(shard.getGenomeLocs().size() > 1)
throw new StingException("This SAMDataSource does not support multiple intervals within a single shard");
GenomeLoc shardGenomeLoc = shard.getGenomeLocs().get(0);
if (mLastInterval != null && shard.getShardType() == Shard.ShardType.READ_INTERVAL )
iterator = new PlusOneFixIterator(shard.getGenomeLoc(),new IntervalOverlapIterator(iterator,mLastInterval,false));
mLastInterval = shard.getGenomeLoc();
iterator = new PlusOneFixIterator(shardGenomeLoc,new IntervalOverlapIterator(iterator,mLastInterval,false));
mLastInterval = shardGenomeLoc;
} else {
throw new StingException("seek: Unknown shard type");
@ -205,7 +203,12 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
if( getHeader().getSequenceDictionary().getSequences().size() == 0 )
throw new StingException("Unable to seek to the given locus; reads data source has no alignment information.");
return createIterator( new MappedStreamSegment(shard.getGenomeLoc()) );
if(shard.getGenomeLocs().size() > 1)
throw new StingException("This SAMDataSource does not support multiple intervals within a single shard");
GenomeLoc shardGenomeLoc = shard.getGenomeLocs().get(0);
return createIterator( new MappedStreamSegment(Collections.singletonList(shardGenomeLoc)) );
}
/**
@ -231,11 +234,11 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
if (!intoUnmappedReads) {
if (lastReadPos == null) {
lastReadPos = GenomeLocParser.createGenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE);
iter = createIterator(new MappedStreamSegment(lastReadPos));
iter = createIterator(new MappedStreamSegment(Collections.singletonList(lastReadPos)));
return InitialReadIterator(readShard.getSize(), iter);
} else {
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
iter = fastMappedReadSeek(readShard.getSize(), StingSAMIteratorAdapter.adapt(reads, createIterator(new MappedStreamSegment(lastReadPos))));
iter = fastMappedReadSeek(readShard.getSize(), StingSAMIteratorAdapter.adapt(reads, createIterator(new MappedStreamSegment(Collections.singletonList(lastReadPos)))));
}
if (intoUnmappedReads && !includeUnmappedReads)
@ -335,7 +338,7 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
readsTaken = readCount;
readsSeenAtLastPos = 0;
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
CloseableIterator<SAMRecord> ret = createIterator(new MappedStreamSegment(lastReadPos));
CloseableIterator<SAMRecord> ret = createIterator(new MappedStreamSegment(Collections.singletonList(lastReadPos)));
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, ret), readCount);
}
}

View File

@ -67,7 +67,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* @return Iterator through the data.
*/
public Iterator seek( Shard shard ) {
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(shard.getGenomeLoc()) );
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(shard.getGenomeLocs()) );
return iterator;
}

View File

@ -170,8 +170,10 @@ class EntireStream implements DataStreamSegment {
*/
class MappedStreamSegment implements DataStreamSegment {
public final GenomeLoc locus;
public MappedStreamSegment( GenomeLoc locus ) {
this.locus = locus;
public MappedStreamSegment( List<GenomeLoc> loci ) {
if(loci.size() > 1)
throw new StingException("MappedStreamSegments cannot apply to a range of loci");
this.locus = !loci.isEmpty() ? loci.get(0) : null;
}
}

View File

@ -1,18 +1,14 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.util.CloseableIterator;
import net.sf.picard.filter.FilteringIterator;
import net.sf.picard.filter.SamRecordFilter;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.iterators.*;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
import java.io.File;

View File

@ -134,7 +134,10 @@ public abstract class Accumulator {
* Create a holder for interval results if none exists. Add the result to the holder.
*/
public void accumulate( Shard shard, Object result ) {
intervalAccumulator.add( new Pair<GenomeLoc,Object>( shard.getGenomeLoc(), result ) );
// TODO: The following code is actually wrong we'll be doubly assigning results to locations.
// Fix before the new sharding system comes online.
for(GenomeLoc genomeLoc: shard.getGenomeLocs())
intervalAccumulator.add( new Pair<GenomeLoc,Object>( genomeLoc, result ) );
}
/**

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.NoSuchElementException;
import java.util.Iterator;
import java.util.List;
/**
* User: hanna
* Date: May 12, 2009
@ -23,24 +24,28 @@ import java.util.Iterator;
*/
public class GenomeLocusIterator implements Iterator<GenomeLoc> {
/**
* The entire region over which we're iterating.
* An iterator to the entire data structure over which we're iterating.
*/
private GenomeLoc completeLocus;
private final Iterator<GenomeLoc> locusIterator;
/**
* The current position in the traversal.
* The multi-base pair long locus referring to the current locus.
*/
private GenomeLoc currentLocus;
private GenomeLoc currentLocus = null;
/**
* The 1 base pair long location.
*/
private GenomeLoc currentLocation = null;
/**
* Creates an iterator that can traverse over the entire
* reference specified in the given ShardDataProvider.
* @param completeLocus Data provider to use as a backing source.
* Provider must have a reference (hasReference() == true).
* @param loci the list of loci over which to iterate.
*/
public GenomeLocusIterator( GenomeLoc completeLocus ) {
this.completeLocus = completeLocus;
this.currentLocus = GenomeLocParser.createGenomeLoc(completeLocus.getContig(),completeLocus.getStart());
public GenomeLocusIterator( List<GenomeLoc> loci ) {
this.locusIterator = loci.iterator();
seedNextLocus();
}
/**
@ -48,7 +53,7 @@ public class GenomeLocusIterator implements Iterator<GenomeLoc> {
* @return True if the iterator has more elements. False otherwise.
*/
public boolean hasNext() {
return !currentLocus.isPast(completeLocus);
return currentLocation != null;
}
/**
@ -58,12 +63,29 @@ public class GenomeLocusIterator implements Iterator<GenomeLoc> {
public GenomeLoc next() {
if( !hasNext() )
throw new NoSuchElementException("No elements remaining in bounded reference region.");
GenomeLoc toReturn = (GenomeLoc)currentLocus.clone();
currentLocus = GenomeLocParser.incPos(currentLocus);
GenomeLoc toReturn = currentLocation.clone();
seedNextLocus();
return toReturn;
}
public void remove() {
throw new UnsupportedOperationException( "ReferenceLocusIterator is read-only" );
}
/**
* Position currentLocation at the next locus, if possible.
*/
private void seedNextLocus() {
if(currentLocus != null && currentLocation != null)
currentLocation = GenomeLocParser.incPos(currentLocation);
// If initializing or the location was pushed off the current locus, reinitialize using the next locus.
if(currentLocus == null || currentLocation == null || currentLocation.isPast(currentLocus)) {
currentLocus = currentLocation = null;
if(locusIterator.hasNext()){
currentLocus = locusIterator.next();
currentLocation = GenomeLocParser.createGenomeLoc(currentLocus.getContig(),currentLocus.getStart());
}
}
}
}

View File

@ -23,8 +23,6 @@ import java.util.ArrayList;
*/
public class TraverseLoci extends TraversalEngine {
final private static String LOCI_STRING = "sites";
//final private static boolean ENABLE_ROD_TRAVERSAL = false;
/**
* our log, which we want to capture anything from this class
@ -52,14 +50,11 @@ public class TraverseLoci extends TraversalEngine {
LocusView locusView = getLocusView( walker, dataProvider );
//if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA )
// throw new RuntimeException("Engine currently doesn't support RodWalkers");
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
//ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
ReferenceOrderedView referenceOrderedDataView = null;
if ( /* ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers || */ WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
else
referenceOrderedDataView = (RodLocusView)locusView;
@ -110,7 +105,7 @@ public class TraverseLoci extends TraversalEngine {
// We have a final map call to execute here to clean up the skipped based from the
// last position in the ROD to that in the interval
if ( /* GenomeAnalysisEngine.instance.getArguments().enableRodWalkers && */ WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA ) {
if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA ) {
RodLocusView rodLocusView = (RodLocusView)locusView;
long nSkipped = rodLocusView.getLastSkippedBases();
if ( nSkipped > 0 ) {

View File

@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.StingException;
import java.util.ArrayList;
import java.util.List;
@ -36,7 +37,9 @@ public class TraverseLocusWindows extends TraversalEngine {
LocusWindowWalker<M, T> locusWindowWalker = (LocusWindowWalker<M, T>)walker;
GenomeLoc interval = shard.getGenomeLoc();
if(shard.getGenomeLocs().size() > 1)
throw new StingException("This traversal does not support multiple intervals within a single shard");
GenomeLoc interval = shard.getGenomeLocs().get(0);
ReadView readView = new ReadView( dataProvider );
LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );

View File

@ -75,8 +75,12 @@ public class ResourceBundleExtractorDoclet {
resourceText.store(out,"Strings displayed by the Sting help system");
// ASCII codes for making text blink
final String blink = "\u001B\u005B\u0035\u006D";
final String reset = "\u001B\u005B\u006D";
if(undocumentedWalkers.size() > 0)
Utils.warnUser("The following walkers are currently undocumented: " + Utils.join(" ",undocumentedWalkers));
Utils.warnUser(String.format("The following walkers are currently undocumented: %s%s%s", blink, Utils.join(" ",undocumentedWalkers), reset));
return true;
}

View File

@ -33,13 +33,18 @@ public class AllLocusViewTest extends LocusViewTemplate {
/**
* Test the reads according to an independently derived context.
* @param view
* @param bounds
* @param range
* @param reads
*/
@Override
protected void testReadsInContext( LocusView view, GenomeLoc bounds, List<SAMRecord> reads ) {
protected void testReadsInContext( LocusView view, List<GenomeLoc> range, List<SAMRecord> reads ) {
AllLocusView allLocusView = (AllLocusView)view;
// TODO: Should skip over loci not in the given range.
GenomeLoc firstLoc = range.get(0);
GenomeLoc lastLoc = range.get(range.size()-1);
GenomeLoc bounds = GenomeLocParser.createGenomeLoc(firstLoc.getContigIndex(),firstLoc.getStart(),lastLoc.getStop());
for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) {
GenomeLoc site = GenomeLocParser.createGenomeLoc("chr1",i);
AlignmentContext locusContext = allLocusView.next();

View File

@ -36,13 +36,18 @@ public class CoveredLocusViewTest extends LocusViewTemplate {
/**
* Test the reads according to an independently derived context.
* @param view
* @param bounds
* @param range
* @param reads
*/
@Override
protected void testReadsInContext( LocusView view, GenomeLoc bounds, List<SAMRecord> reads ) {
protected void testReadsInContext( LocusView view, List<GenomeLoc> range, List<SAMRecord> reads ) {
CoveredLocusView coveredLocusView = (CoveredLocusView)view;
// TODO: Should skip over loci not in the given range.
GenomeLoc firstLoc = range.get(0);
GenomeLoc lastLoc = range.get(range.size()-1);
GenomeLoc bounds = GenomeLocParser.createGenomeLoc(firstLoc.getContigIndex(),firstLoc.getStart(),lastLoc.getStop());
for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) {
GenomeLoc site = GenomeLocParser.createGenomeLoc("chr1",i);

View File

@ -87,7 +87,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
*/
protected void validateLocation( GenomeLoc loc ) {
Shard shard = new LocusShard(loc);
GenomeLocusIterator shardIterator = new GenomeLocusIterator(shard.getGenomeLoc());
GenomeLocusIterator shardIterator = new GenomeLocusIterator(shard.getGenomeLocs());
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
LocusReferenceView view = new LocusReferenceView(dataProvider);
@ -99,7 +99,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
char expected = StringUtil.bytesToString(expectedAsSeq.getBases()).charAt(0);
char actual = view.getReferenceContext(locus).getBase();
Assert.assertEquals(String.format("Value of base at position %s in shard %s does not match expected", locus.toString(), shard.getGenomeLoc()),
Assert.assertEquals(String.format("Value of base at position %s in shard %s does not match expected", locus.toString(), shard.getGenomeLocs()),
expected,
actual);
}

View File

@ -50,7 +50,7 @@ public abstract class LocusViewTemplate extends BaseTest {
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLoc(), Collections.<SAMRecord>emptyList());
testReadsInContext(view, shard.getGenomeLocs(), Collections.<SAMRecord>emptyList());
}
@Test
@ -64,7 +64,7 @@ public abstract class LocusViewTemplate extends BaseTest {
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read));
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
}
@Test
@ -76,7 +76,7 @@ public abstract class LocusViewTemplate extends BaseTest {
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read));
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
}
@Test
@ -88,7 +88,7 @@ public abstract class LocusViewTemplate extends BaseTest {
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read));
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
}
@Test
@ -100,7 +100,7 @@ public abstract class LocusViewTemplate extends BaseTest {
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read));
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
}
@Test
@ -112,7 +112,7 @@ public abstract class LocusViewTemplate extends BaseTest {
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read));
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
}
@Test
@ -124,7 +124,7 @@ public abstract class LocusViewTemplate extends BaseTest {
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read));
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
}
@Test
@ -139,7 +139,7 @@ public abstract class LocusViewTemplate extends BaseTest {
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
Collections.addAll(expectedReads, read1, read2);
testReadsInContext(view, shard.getGenomeLoc(), expectedReads);
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
}
@Test
@ -156,7 +156,7 @@ public abstract class LocusViewTemplate extends BaseTest {
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
Collections.addAll(expectedReads, read1, read2, read3, read4);
testReadsInContext(view, shard.getGenomeLoc(), expectedReads);
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
}
@Test
@ -173,7 +173,7 @@ public abstract class LocusViewTemplate extends BaseTest {
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
Collections.addAll(expectedReads, read1, read2, read3, read4);
testReadsInContext(view, shard.getGenomeLoc(), expectedReads);
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
}
@Test
@ -192,7 +192,7 @@ public abstract class LocusViewTemplate extends BaseTest {
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
Collections.addAll(expectedReads, read1, read2, read3, read4, read5, read6);
testReadsInContext(view, shard.getGenomeLoc(), expectedReads);
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
}
@Test
@ -219,7 +219,7 @@ public abstract class LocusViewTemplate extends BaseTest {
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
Collections.addAll(expectedReads, read01, read02, read03, read04, read05, read06,
read07, read08, read09, read10, read11, read12);
testReadsInContext(view, shard.getGenomeLoc(), expectedReads);
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
}
/**
@ -236,7 +236,7 @@ public abstract class LocusViewTemplate extends BaseTest {
* @param bounds
* @param reads
*/
protected abstract void testReadsInContext(LocusView view, GenomeLoc bounds, List<SAMRecord> reads);
protected abstract void testReadsInContext(LocusView view, List<GenomeLoc> bounds, List<SAMRecord> reads);
/**
* Fake a reference sequence file. Essentially, seek a header with a bunch of dummy data.
@ -256,7 +256,7 @@ public abstract class LocusViewTemplate extends BaseTest {
}
public void reset() {
return; // TODO MATT FIX ME
return;
}
};
}

View File

@ -112,8 +112,9 @@ public class IntervalShardStrategyTest extends BaseTest {
int counter = 0;
while (strat.hasNext()) {
Shard d = strat.next();
assertEquals(1, d.getGenomeLoc().getStart());
assertEquals(1000, d.getGenomeLoc().getStop());
assertEquals(1,d.getGenomeLocs().size());
assertEquals(1, d.getGenomeLocs().get(0).getStart());
assertEquals(1000, d.getGenomeLocs().get(0).getStop());
counter++;
}
assertEquals(5, counter);
@ -130,7 +131,8 @@ public class IntervalShardStrategyTest extends BaseTest {
int counter = 0;
while (strat.hasNext()) {
Shard d = strat.next();
assertEquals(1000, d.getGenomeLoc().getStop());
assertEquals(1,d.getGenomeLocs().size());
assertEquals(1000, d.getGenomeLocs().get(0).getStop());
counter++;
}
assertEquals(5, counter);

View File

@ -6,6 +6,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import net.sf.samtools.SAMFileHeader;
@ -60,14 +61,16 @@ public class IntervalShardTest extends BaseTest {
public void simpleReturn() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 100);
intervalShard = new IntervalShard(loc,Shard.ShardType.LOCUS_INTERVAL);
assertTrue(intervalShard.getGenomeLoc().equals(loc));
assertEquals("Input parameters imply a single-locus shard",1,intervalShard.getGenomeLocs().size());
assertTrue(intervalShard.getGenomeLocs().get(0).equals(loc));
}
@Test
public void ensureNotReference() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 100);
intervalShard = new IntervalShard(loc,Shard.ShardType.LOCUS_INTERVAL);
assertTrue(intervalShard.getGenomeLoc() != loc && intervalShard.getGenomeLoc().equals(loc));
assertEquals("Input parameters imply a single-locus shard",1,intervalShard.getGenomeLocs().size());
assertTrue(intervalShard.getGenomeLocs().get(0) != loc && intervalShard.getGenomeLocs().get(0).equals(loc));
}
}

View File

@ -6,6 +6,7 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.BaseTest;
import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import net.sf.samtools.SAMFileHeader;
@ -62,7 +63,8 @@ public class LinearLocusShardStrategyTest extends BaseTest {
while(strat.hasNext()) {
Shard d = strat.next();
assertTrue(d instanceof LocusShard);
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 499);
assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size());
assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 499);
++counter;
}
assertTrue(counter == 10);
@ -76,7 +78,8 @@ public class LinearLocusShardStrategyTest extends BaseTest {
while(strat.hasNext()) {
Shard d = strat.next();
assertTrue(d instanceof LocusShard);
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 999);
assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size());
assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 999);
++counter;
}
assertTrue(counter == 5);
@ -90,10 +93,11 @@ public class LinearLocusShardStrategyTest extends BaseTest {
while(strat.hasNext()) {
Shard d = strat.next();
assertTrue(d instanceof LocusShard);
assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size());
if (counter % 2 == 0) {
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 599);
assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 599);
} else {
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 399);
assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 399);
}
++counter;
}
@ -108,7 +112,8 @@ public class LinearLocusShardStrategyTest extends BaseTest {
while(strat.hasNext()) {
Shard d = strat.next();
assertTrue(d instanceof LocusShard);
assertTrue((d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart()) == 199);
assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size());
assertTrue((d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart()) == 199);
++counter;
}
assertTrue(counter == 1);

View File

@ -67,20 +67,6 @@ public class ShardStrategyFactoryTest extends BaseTest {
assertTrue(st instanceof LinearLocusShardStrategy);
}
@Test
public void testExpNonInterval() {
ShardStrategy st = ShardStrategyFactory.shatter(null,ShardStrategyFactory.SHATTER_STRATEGY.EXPONENTIAL,header.getSequenceDictionary(),100);
assertTrue(st instanceof ExpGrowthLocusShardStrategy);
}
@Test
public void testExpInterval() {
GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100);
set.add(l);
ShardStrategy st = ShardStrategyFactory.shatter(null,ShardStrategyFactory.SHATTER_STRATEGY.EXPONENTIAL,header.getSequenceDictionary(),100,set);
assertTrue(st instanceof ExpGrowthLocusShardStrategy);
}
@Test
public void testLinearInterval() {
GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100);

View File

@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Collections;
/**
* User: hanna
* Date: May 21, 2009
@ -54,7 +55,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test
public void testCreateSingleIterator() {
ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod);
SeekableRODIterator iterator = (SeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) );
SeekableRODIterator iterator = (SeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite1)) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
@ -75,10 +76,10 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test
public void testCreateMultipleIterators() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
SeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
SeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite1)) );
// Create a new iterator at position 2.
SeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) );
SeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite2)) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
@ -125,7 +126,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test
public void testIteratorConservation() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite1)) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
@ -139,7 +140,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
iteratorPool.release(iterator);
// Create another iterator after the current iterator.
iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) );
iterator = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite3)) );
// Make sure that the previously acquired iterator was reused.
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
@ -160,7 +161,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test
public void testIteratorCreation() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) );
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite3)) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
@ -174,7 +175,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
iteratorPool.release(iterator);
// Create another iterator after the current iterator.
iterator = iteratorPool.iterator(new MappedStreamSegment(testSite1) );
iterator = iteratorPool.iterator(new MappedStreamSegment(Collections.singletonList(testSite1)) );
// Make sure that the previously acquired iterator was reused.
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());

View File

@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.junit.After;
import org.junit.Before;
@ -96,7 +97,8 @@ public class SAMBAMDataSourceTest extends BaseTest {
int readCount = 0;
count++;
logger.debug("Start : " + sh.getGenomeLoc().getStart() + " stop : " + sh.getGenomeLoc().getStop() + " contig " + sh.getGenomeLoc().getContig());
GenomeLoc firstLocus = sh.getGenomeLocs().get(0), lastLocus = sh.getGenomeLocs().get(sh.getGenomeLocs().size()-1);
logger.debug("Start : " + firstLocus.getStart() + " stop : " + lastLocus.getStop() + " contig " + firstLocus.getContig());
logger.debug("count = " + count);
StingSAMIterator datum = data.seek(sh);