Basic support for very simple index-driven locus traversals. Interface has been changed to

support batched intervals in a single shard, but intervals are not yet compressed into a single
shard.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2730 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-01-29 03:14:26 +00:00
parent 4810e9c9cd
commit 3d922a019f
36 changed files with 439 additions and 266 deletions

View File

@ -184,6 +184,19 @@ class BAMFileReader2
return mCurrentIterator; return mCurrentIterator;
} }
public List<Chunk> getOverlappingFilePointers(final String sequence, final int start, final int end) {
long[] filePointers = null;
final SAMFileHeader fileHeader = getFileHeader();
int referenceIndex = fileHeader.getSequenceIndex(sequence);
if (referenceIndex != -1) {
final BAMFileIndex fileIndex = getFileIndex();
filePointers = fileIndex.getSearchBins(referenceIndex, start, end);
}
return Chunk.toChunkList(filePointers);
}
/** /**
* Prepare to iterate through the SAMRecords that match the given interval. * Prepare to iterate through the SAMRecords that match the given interval.
* Only a single iterator on a BAMFile can be extant at a time. The previous one must be closed * Only a single iterator on a BAMFile can be extant at a time. The previous one must be closed

View File

@ -199,9 +199,16 @@ public class SAMFileReader2 implements Iterable<SAMRecord> {
*/ */
public CloseableIterator<SAMRecord> iterator(List<Chunk> chunks) { public CloseableIterator<SAMRecord> iterator(List<Chunk> chunks) {
// TODO: Add sanity checks so that we're not doing this against a BAM file. // TODO: Add sanity checks so that we're not doing this against a BAM file.
if(!(mReader instanceof ReaderImplementation2)) if(!(mReader instanceof BAMFileReader2))
throw new PicardException("This call requires a ReaderImplementation2-compliant interface"); throw new PicardException("This call requires a ReaderImplementation2-compliant interface");
return ((ReaderImplementation2)mReader).getIterator(chunks); return ((BAMFileReader2)mReader).getIterator(chunks);
}
public List<Chunk> getOverlappingFilePointers(final String sequence, final int start, final int end) {
// TODO: Add sanity checks so that we're not doing this against a BAM file.
if(!(mReader instanceof BAMFileReader2))
throw new PicardException("This call requires a ReaderImplementation2-compliant interface");
return ((BAMFileReader2)mReader).getOverlappingFilePointers(sequence,start,end);
} }
/** /**

View File

@ -609,9 +609,8 @@ public class GenomeAnalysisEngine {
shardType = (walker.isReduceByInterval()) ? shardType = (walker.isReduceByInterval()) ?
ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL : ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL :
ShardStrategyFactory.SHATTER_STRATEGY.LINEAR; ShardStrategyFactory.SHATTER_STRATEGY.LINEAR;
shardStrategy = ShardStrategyFactory.shatter(readsDataSource, shardStrategy = ShardStrategyFactory.shatter(readsDataSource,
shardType, argCollection.experimentalSharding ? ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL : shardType,
drivingDataSource.getSequenceDictionary(), drivingDataSource.getSequenceDictionary(),
SHARD_SIZE, SHARD_SIZE,
intervals, maxIterations); intervals, maxIterations);

View File

@ -46,7 +46,7 @@ public class AllLocusView extends LocusView {
public AllLocusView(ShardDataProvider provider) { public AllLocusView(ShardDataProvider provider) {
super( provider ); super( provider );
// Seed the state tracking members with the first possible seek position and the first possible locus context. // Seed the state tracking members with the first possible seek position and the first possible locus context.
locusIterator = new GenomeLocusIterator( provider.getShard().getGenomeLoc() ); locusIterator = new GenomeLocusIterator( provider.getShard().getGenomeLocs() );
if( locusIterator.hasNext() ) { if( locusIterator.hasNext() ) {
nextPosition = locusIterator.next(); nextPosition = locusIterator.next();
nextLocus = hasNextLocus() ? nextLocus() : createEmptyLocus(nextPosition); nextLocus = hasNextLocus() ? nextLocus() : createEmptyLocus(nextPosition);

View File

@ -9,6 +9,8 @@ import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import net.sf.picard.reference.ReferenceSequence; import net.sf.picard.reference.ReferenceSequence;
import net.sf.samtools.util.StringUtil; import net.sf.samtools.util.StringUtil;
import java.util.List;
/* /*
* Copyright (c) 2009 The Broad Institute * Copyright (c) 2009 The Broad Institute
* *
@ -69,7 +71,7 @@ public class LocusReferenceView extends ReferenceView {
*/ */
public LocusReferenceView( ShardDataProvider provider ) { public LocusReferenceView( ShardDataProvider provider ) {
super(provider); super(provider);
bounds = provider.getShard().getGenomeLoc(); initializeBounds(provider);
windowStart = windowStop = 0; windowStart = windowStop = 0;
initializeReferenceSequence(bounds); initializeReferenceSequence(bounds);
} }
@ -80,7 +82,7 @@ public class LocusReferenceView extends ReferenceView {
*/ */
public LocusReferenceView( Walker walker, ShardDataProvider provider ) { public LocusReferenceView( Walker walker, ShardDataProvider provider ) {
super( provider ); super( provider );
bounds = provider.getShard().getGenomeLoc(); initializeBounds(provider);
// Retrieve information about the window being accessed. // Retrieve information about the window being accessed.
if( walker.getClass().isAnnotationPresent(Reference.class) ) { if( walker.getClass().isAnnotationPresent(Reference.class) ) {
@ -131,6 +133,22 @@ public class LocusReferenceView extends ReferenceView {
initializeReferenceSequence(GenomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop)); initializeReferenceSequence(GenomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop));
} }
private void initializeBounds(ShardDataProvider provider) {
List<GenomeLoc> loci = provider.getShard().getGenomeLocs();
if(loci.isEmpty()) {
bounds = null;
return;
}
GenomeLoc firstLocus = loci.get(0);
GenomeLoc lastLocus = loci.get(loci.size()-1);
if(firstLocus.getContigIndex() != lastLocus.getContigIndex())
throw new StingException("LocusReferenceView currently only supports multiple intervals on the same contig.");
bounds = GenomeLocParser.createGenomeLoc(firstLocus.getContig(),firstLocus.getStart(),lastLocus.getStop());
}
/** /**
* Initialize reference sequence data using the given locus. * Initialize reference sequence data using the given locus.
* @param locus * @param locus

View File

@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.iterators.LocusIterator; import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
@ -111,7 +112,8 @@ public abstract class LocusView extends LocusIterator implements View {
* @return True if another locus context is bounded by this shard. * @return True if another locus context is bounded by this shard.
*/ */
protected boolean hasNextLocus() { protected boolean hasNextLocus() {
return nextLocus != null && (shard.getGenomeLoc() == null || !nextLocus.getLocation().isPast(shard.getGenomeLoc())); GenomeLoc lastLocus = !shard.getGenomeLocs().isEmpty() ? shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1) : null;
return nextLocus != null && (lastLocus == null || !nextLocus.getLocation().isPast(lastLocus));
} }
/** /**
@ -120,7 +122,9 @@ public abstract class LocusView extends LocusIterator implements View {
* @throw NoSuchElementException if the next element is missing. * @throw NoSuchElementException if the next element is missing.
*/ */
protected AlignmentContext nextLocus() { protected AlignmentContext nextLocus() {
if( nextLocus == null || (shard.getGenomeLoc() != null && nextLocus.getLocation().isPast(shard.getGenomeLoc())) ) GenomeLoc lastLocus = !shard.getGenomeLocs().isEmpty() ? shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1) : null;
if( nextLocus == null || (lastLocus != null && nextLocus.getLocation().isPast(lastLocus)) )
throw new NoSuchElementException("No more elements remain in locus context queue."); throw new NoSuchElementException("No more elements remain in locus context queue.");
// Cache the current and apply filtering. // Cache the current and apply filtering.
@ -131,7 +135,7 @@ public abstract class LocusView extends LocusIterator implements View {
nextLocus = loci.next(); nextLocus = loci.next();
if( sourceInfo.getDownsampleToCoverage() != null ) if( sourceInfo.getDownsampleToCoverage() != null )
current.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() ); current.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() );
if( shard.getGenomeLoc() != null && nextLocus.getLocation().isPast(shard.getGenomeLoc()) ) if( lastLocus != null && nextLocus.getLocation().isPast(lastLocus) )
nextLocus = null; nextLocus = null;
} }
else else
@ -152,13 +156,13 @@ public abstract class LocusView extends LocusIterator implements View {
nextLocus = loci.next(); nextLocus = loci.next();
// If the location of this shard is available, trim the data stream to match the shard. // If the location of this shard is available, trim the data stream to match the shard.
if(shard.getGenomeLoc() != null) { if(!shard.getGenomeLocs().isEmpty()) {
// Iterate past cruft at the beginning to the first locus in the shard. // Iterate past cruft at the beginning to the first locus in the shard.
while( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLoc()) && loci.hasNext() ) while( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLocs().get(0)) && loci.hasNext() )
nextLocus = loci.next(); nextLocus = loci.next();
// If nothing in the shard was found, indicate that by setting nextAlignmentContext to null. // If nothing in the shard was found, indicate that by setting nextAlignmentContext to null.
if( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLoc()) ) if( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLocs().get(0)) )
nextLocus = null; nextLocus = null;
} }
} }

View File

@ -11,7 +11,6 @@ import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
import java.util.*; import java.util.*;
import net.sf.samtools.SAMRecord;
/** /**
* User: hanna * User: hanna
* Date: May 21, 2009 * Date: May 21, 2009
@ -38,9 +37,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
GenomeLoc lastLoc = null; GenomeLoc lastLoc = null;
RODRecordList<ReferenceOrderedDatum> interval = null; RODRecordList<ReferenceOrderedDatum> interval = null;
// broken support for multi-locus rods
//List<ReferenceOrderedDatum> multiLocusRODs = new LinkedList<ReferenceOrderedDatum>();
/** /**
* The data sources along with their current states. * The data sources along with their current states.
*/ */
@ -61,18 +57,17 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
public RodLocusView( ShardDataProvider provider ) { public RodLocusView( ShardDataProvider provider ) {
super(provider); super(provider);
GenomeLoc loc = provider.getShard().getGenomeLoc(); GenomeLoc firstLoc = provider.getShard().getGenomeLocs().get(0);
List< Iterator<RODRecordList<ReferenceOrderedDatum>> > iterators = new LinkedList< Iterator<RODRecordList<ReferenceOrderedDatum>> >(); List< Iterator<RODRecordList<ReferenceOrderedDatum>> > iterators = new LinkedList< Iterator<RODRecordList<ReferenceOrderedDatum>> >();
for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() ) { for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() ) {
if ( DEBUG ) System.out.printf("Shard is %s%n", loc); if ( DEBUG ) System.out.printf("Shard is %s%n", provider.getShard().getGenomeLocs());
// grab the ROD iterator from the data source, and compute the first location in this shard, forwarding // grab the ROD iterator from the data source, and compute the first location in this shard, forwarding
// the iterator to immediately before it, so that it can be added to the merging iterator primed for // the iterator to immediately before it, so that it can be added to the merging iterator primed for
// next() to return the first real ROD in this shard // next() to return the first real ROD in this shard
SeekableRODIterator it = (SeekableRODIterator)dataSource.seek(provider.getShard()); SeekableRODIterator it = (SeekableRODIterator)dataSource.seek(provider.getShard());
GenomeLoc shardLoc = provider.getShard().getGenomeLoc(); it.seekForward(GenomeLocParser.createGenomeLoc(firstLoc.getContigIndex(), firstLoc.getStart()-1));
it.seekForward(GenomeLocParser.createGenomeLoc(shardLoc.getContigIndex(), shardLoc.getStart()-1, shardLoc.getStart()-1));
states.add(new ReferenceOrderedDataState(dataSource,it)); states.add(new ReferenceOrderedDataState(dataSource,it));
@ -99,7 +94,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
if ( ! rodQueue.hasNext() ) if ( ! rodQueue.hasNext() )
return false; return false;
else { else {
return ! rodQueue.peekLocation().isPast(shard.getGenomeLoc()); GenomeLoc lastLocus = shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1);
return ! rodQueue.peekLocation().isPast(lastLocus);
} }
} }
@ -148,11 +144,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
return t; return t;
} }
private Collection<RODRecordList<ReferenceOrderedDatum>> getSpanningTracks(ReferenceOrderedDatum marker) {
RODRecordList<ReferenceOrderedDatum> wrapper = new RODRecordList<ReferenceOrderedDatum>(marker.getName(),Collections.singletonList(marker),marker.getLocation());
return rodQueue.allElementsLTE(wrapper);
}
private Collection<RODRecordList<ReferenceOrderedDatum>> getSpanningTracks(RODRecordList<ReferenceOrderedDatum> marker) { private Collection<RODRecordList<ReferenceOrderedDatum>> getSpanningTracks(RODRecordList<ReferenceOrderedDatum> marker) {
return rodQueue.allElementsLTE(marker); return rodQueue.allElementsLTE(marker);
} }
@ -173,7 +164,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
if ( lastLoc == null ) { if ( lastLoc == null ) {
// special case -- we're at the start // special case -- we're at the start
//System.out.printf("Cur=%s, shard=%s%n", currentPos, shard.getGenomeLoc()); //System.out.printf("Cur=%s, shard=%s%n", currentPos, shard.getGenomeLoc());
skippedBases = currentPos.getStart() - shard.getGenomeLoc().getStart(); GenomeLoc firstLoc = shard.getGenomeLocs().get(0);
skippedBases = currentPos.getStart() - firstLoc.getStart();
} else { } else {
//System.out.printf("Cur=%s, last=%s%n", currentPos, lastLoc); //System.out.printf("Cur=%s, last=%s%n", currentPos, lastLoc);
skippedBases = currentPos.minus(lastLoc) - 1; skippedBases = currentPos.minus(lastLoc) - 1;
@ -181,7 +173,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
if ( skippedBases < -1 ) { // minus 1 value is ok if ( skippedBases < -1 ) { // minus 1 value is ok
throw new RuntimeException(String.format("BUG: skipped bases=%d is < 0: cur=%s vs. last=%s, shard=%s", throw new RuntimeException(String.format("BUG: skipped bases=%d is < 0: cur=%s vs. last=%s, shard=%s",
skippedBases, currentPos, lastLoc, shard.getGenomeLoc())); skippedBases, currentPos, lastLoc, shard.getGenomeLocs()));
} }
return Math.max(skippedBases, 0); return Math.max(skippedBases, 0);
} }
@ -191,9 +183,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
* @return * @return
*/ */
public GenomeLoc getLocOneBeyondShard() { public GenomeLoc getLocOneBeyondShard() {
return GenomeLocParser.createGenomeLoc( shard.getGenomeLoc().getContigIndex(), GenomeLoc lastLocus = !shard.getGenomeLocs().isEmpty() ? shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1) : null;
shard.getGenomeLoc().getStop()+1, return GenomeLocParser.createGenomeLoc(lastLocus.getContigIndex(),lastLocus.getStop()+1);
shard.getGenomeLoc().getStop()+1);
} }
/** /**

View File

@ -1,106 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.shards;
import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @author aaron
* @version 1.0
* @date Apr 6, 2009
* <p/>
* Class LinearShard
* <p/>
* A exponential strategy
*/
public class ExpGrowthLocusShardStrategy extends LocusShardStrategy {
// fixed size
private long baseSize = 100000;
private long currentExp = 0;
/**
* the constructor, taking a seq dictionary to parse out contigs
*
* @param dic the seq dictionary
*/
ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize, long limitByCount) {
super(dic);
this.limitingFactor = limitByCount;
this.baseSize = startSize;
currentExp = 0;
}
/**
* the constructor, taking a seq dictionary to parse out contigs
*
* @param strat the shatter to convert from
*/
ExpGrowthLocusShardStrategy(LocusShardStrategy strat) {
super(strat);
this.baseSize = strat.nextShardSize();
currentExp = 0;
}
/**
* The constructor, for a genomic list, start size, and a reference dictionary
*
* @param dic the reference dictionary
* @param startSize the starting size of the shard
* @param lst locations to iterate from
*/
ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst, long limitByCount) {
super(dic, lst);
this.limitingFactor = limitByCount;
this.baseSize = startSize;
this.currentExp = 0;
}
/**
* set the next shards size
*
* @param size adjust the next size to this
*/
public void adjustNextShardSize(long size) {
baseSize = size;
currentExp = 0;
}
/**
* This is how the various shards strategies implements their approach
*
* @return the next shard size
*/
protected long nextShardSize() {
// we grow the exponentially, we just have to make sure we start at zero
++currentExp;
return (long) Math.floor(Math.pow((double) baseSize, (double) currentExp));
}
}

View File

@ -0,0 +1,87 @@
package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import net.sf.samtools.Chunk;
import java.util.List;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* A shard that's delimited based on the index rather than
*/
public class IndexDelimitedLocusShard implements Shard {
/**
* a collection of genomic locations to interate over
*/
private final GenomeLocSortedSet intervals;
/**
* A list of the chunks associated with this shard.
*/
private final List<Chunk> chunks;
IndexDelimitedLocusShard(GenomeLocSortedSet intervals, List<Chunk> chunks) {
this.intervals = intervals;
this.chunks = chunks;
}
/**
* The locations represented by this shard.
* @return the genome location represented by this shard
*/
public List<GenomeLoc> getGenomeLocs() {
return intervals.toList();
}
/**
* Gets the chunks associated with this locus shard.
* @return A list of the chunks to use when retrieving locus data.
*/
public List<Chunk> getChunks() {
return chunks;
}
/**
* returns the type of shard, LOCUS_INTERVAL.
* @return LOCUS_INTERVAL, indicating the shard type
*/
public ShardType getShardType() {
return ShardType.LOCUS_INTERVAL;
}
/**
* String representation of this shard.
* @return A string representation of the boundaries of this shard.
*/
@Override
public String toString() {
return intervals.toString();
}
}

View File

@ -0,0 +1,97 @@
package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.BlockDrivenSAMDataSource;
import java.util.*;
import net.sf.samtools.Chunk;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* A sharding strategy for loci based on reading of the index.
*/
public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
/** our storage of the genomic locations they'd like to shard over */
private final SortedMap<GenomeLoc,List<Chunk>> locations = new TreeMap<GenomeLoc,List<Chunk>>();
/**
* construct the shard strategy from a seq dictionary, a shard size, and and genomeLocs
* @param dataSource Data source from which to load index data.
* @param locations List of locations for which to load data.
*/
IndexDelimitedLocusShardStrategy(SAMDataSource dataSource, GenomeLocSortedSet locations) {
for(GenomeLoc location: locations)
this.locations.put(location,((BlockDrivenSAMDataSource)dataSource).getOverlappingFilePointers(location));
}
/**
* returns true if there are additional shards
*
* @return false if we're done processing shards
*/
public boolean hasNext() {
return ( !locations.isEmpty() );
}
/**
* gets the next Shard
*
* @return the next shard
*/
public IndexDelimitedLocusShard next() {
if (( this.locations == null ) || ( locations.isEmpty() )) {
throw new StingException("IntervalShardStrategy: genomic regions list is empty in next() function.");
}
// get the first region in the list
GenomeLoc loc = locations.firstKey();
List<Chunk> filePointers = locations.get(loc);
locations.remove(loc);
return new IndexDelimitedLocusShard(GenomeLocSortedSet.createSetFromList(Arrays.asList(loc)),filePointers);
}
/** we don't support the remove command */
public void remove() {
throw new UnsupportedOperationException("ShardStrategies don't support remove()");
}
/**
* makes the IntervalShard iterable, i.e. usable in a for loop.
*
* @return
*/
public Iterator<Shard> iterator() {
return this;
}
}

View File

@ -2,6 +2,9 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Collections;
import java.util.List;
/* /*
* Copyright (c) 2009 The Broad Institute * Copyright (c) 2009 The Broad Institute
@ -50,8 +53,8 @@ public class IntervalShard implements Shard {
} }
/** @return the genome location represented by this shard */ /** @return the genome location represented by this shard */
public GenomeLoc getGenomeLoc() { public List<GenomeLoc> getGenomeLocs() {
return mSet; return Collections.singletonList(mSet);
} }
/** /**

View File

@ -2,6 +2,9 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Collections;
import java.util.List;
/** /**
* *
* User: aaron * User: aaron
@ -40,8 +43,8 @@ public class LocusShard implements Shard {
} }
/** @return the genome location represented by this shard */ /** @return the genome location represented by this shard */
public GenomeLoc getGenomeLoc() { public List<GenomeLoc> getGenomeLocs() {
return mLoc; return Collections.singletonList(mLoc);
} }
/** /**

View File

@ -3,6 +3,9 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Collections;
import java.util.List;
/** /**
* A single, monolithic shard bridging all available data. * A single, monolithic shard bridging all available data.
* @author mhanna * @author mhanna
@ -28,8 +31,8 @@ public class MonolithicShard implements Shard {
* Returns null, indicating that (in this case) the entire genome is covered. * Returns null, indicating that (in this case) the entire genome is covered.
* @return null. * @return null.
*/ */
public GenomeLoc getGenomeLoc() { public List<GenomeLoc> getGenomeLocs() {
return null; return Collections.emptyList();
} }
/** /**

View File

@ -2,6 +2,8 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.List;
/** /**
* *
* User: aaron * User: aaron
@ -24,7 +26,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
*/ */
public abstract class ReadShard implements Shard { public abstract class ReadShard implements Shard {
/** @return the genome location represented by this shard */ /** @return the genome location represented by this shard */
public GenomeLoc getGenomeLoc() { public List<GenomeLoc> getGenomeLocs() {
throw new UnsupportedOperationException("ReadShard isn't genome loc aware"); throw new UnsupportedOperationException("ReadShard isn't genome loc aware");
} }

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import java.io.Serializable; import java.io.Serializable;
import java.util.List;
/** /**
* *
* User: aaron * User: aaron
@ -34,7 +35,7 @@ public interface Shard extends Serializable {
} }
/** @return the genome location represented by this shard */ /** @return the genome location represented by this shard */
public GenomeLoc getGenomeLoc(); public List<GenomeLoc> getGenomeLocs();
/** /**
* what kind of shard do we return * what kind of shard do we return

View File

@ -38,17 +38,13 @@ import java.io.File;
public class ShardStrategyFactory { public class ShardStrategyFactory {
public enum SHATTER_STRATEGY { public enum SHATTER_STRATEGY {
LINEAR, LINEAR,
EXPONENTIAL,
READS, READS,
READS_EXPERIMENTAL,
INTERVAL, INTERVAL,
MONOLITHIC // Put all of the available data into one shard. MONOLITHIC, // Put all of the available data into one shard.
LOCUS_EXPERIMENTAL,
READS_EXPERIMENTAL
} }
/** our log, which we want to capture anything from this class */
private static Logger logger = Logger.getLogger(ShardStrategyFactory.class);
/** /**
* get a new shatter strategy * get a new shatter strategy
* *
@ -75,14 +71,14 @@ public class ShardStrategyFactory {
switch (strat) { switch (strat) {
case LINEAR: case LINEAR:
return new LinearLocusShardStrategy(dic, startingSize, limitByCount); return new LinearLocusShardStrategy(dic, startingSize, limitByCount);
case EXPONENTIAL:
return new ExpGrowthLocusShardStrategy(dic, startingSize, limitByCount);
case READS: case READS:
return new ReadDelimitedReadShardStrategy(startingSize, limitByCount); return new ReadDelimitedReadShardStrategy(startingSize, limitByCount);
case READS_EXPERIMENTAL:
return new BlockDelimitedReadShardStrategy(dataSource);
case INTERVAL: case INTERVAL:
throw new StingException("Requested trategy: " + strat + " doesn't work with the limiting count (-M) command line option"); throw new StingException("Requested trategy: " + strat + " doesn't work with the limiting count (-M) command line option");
case LOCUS_EXPERIMENTAL:
throw new UnsupportedOperationException("Cannot do experimental locus sharding without intervals");
case READS_EXPERIMENTAL:
return new BlockDelimitedReadShardStrategy(dataSource);
default: default:
throw new StingException("Strategy: " + strat + " isn't implemented for this type of shatter request"); throw new StingException("Strategy: " + strat + " isn't implemented for this type of shatter request");
} }
@ -115,12 +111,12 @@ public class ShardStrategyFactory {
switch (strat) { switch (strat) {
case LINEAR: case LINEAR:
return new LinearLocusShardStrategy(dic, startingSize, lst, limitDataCount); return new LinearLocusShardStrategy(dic, startingSize, lst, limitDataCount);
case EXPONENTIAL:
return new ExpGrowthLocusShardStrategy(dic, startingSize, lst, limitDataCount);
case INTERVAL: case INTERVAL:
return new IntervalShardStrategy(startingSize, lst, Shard.ShardType.LOCUS_INTERVAL); return new IntervalShardStrategy(startingSize, lst, Shard.ShardType.LOCUS_INTERVAL);
case READS: case READS:
return new IntervalShardStrategy(startingSize, lst, Shard.ShardType.READ_INTERVAL); return new IntervalShardStrategy(startingSize, lst, Shard.ShardType.READ_INTERVAL);
case LOCUS_EXPERIMENTAL:
return new IndexDelimitedLocusShardStrategy(dataSource,lst);
case READS_EXPERIMENTAL: case READS_EXPERIMENTAL:
throw new UnsupportedOperationException("Cannot do experimental read sharding with intervals"); throw new UnsupportedOperationException("Cannot do experimental read sharding with intervals");
default: default:

View File

@ -1,19 +1,17 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources; package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.*;
import org.broadinstitute.sting.gatk.datasources.shards.BlockDelimitedReadShard;
import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter; import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.utils.GenomeLoc;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.*;
import net.sf.samtools.SAMFileReader2;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator; import net.sf.samtools.util.CloseableIterator;
import java.util.Collection; import java.util.Collection;
import java.util.List;
import java.io.File; import java.io.File;
/** /**
@ -32,26 +30,46 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
public BlockDrivenSAMDataSource(Reads reads) { public BlockDrivenSAMDataSource(Reads reads) {
super(reads); super(reads);
logger.warn("Experimental sharding is enabled. Many use cases are not supported. Please use with care.");
if(reads.getReadsFiles().size() > 1) if(reads.getReadsFiles().size() > 1)
throw new StingException("Experimental sharding strategy cannot handle multiple BAM files at this point."); throw new StingException("Experimental sharding strategy cannot handle multiple BAM files at this point.");
File readsFile = reads.getReadsFiles().get(0); File readsFile = reads.getReadsFiles().get(0);
reader = new SAMFileReader2(readsFile); reader = new SAMFileReader2(readsFile);
reader.setValidationStringency(reads.getValidationStringency());
} }
public boolean hasIndex() { public boolean hasIndex() {
return reader.hasIndex(); return reader.hasIndex();
} }
public List<Chunk> getOverlappingFilePointers(GenomeLoc location) {
return reader.getOverlappingFilePointers(location.getContig(),(int)location.getStart(),(int)location.getStop());
}
public StingSAMIterator seek(Shard shard) { public StingSAMIterator seek(Shard shard) {
if(!(shard instanceof BlockDelimitedReadShard)) if(!(shard instanceof BlockDelimitedReadShard) && !(shard instanceof IndexDelimitedLocusShard))
throw new StingException("Currently unable to operate on types other than block delimited read shards."); throw new StingException("BlockDrivenSAMDataSource cannot operate on shards of type: " + shard);
CloseableIterator<SAMRecord> iterator = reader.iterator(((BlockDelimitedReadShard)shard).getChunks());
return applyDecoratingIterators(true, if(shard instanceof ReadShard) {
StingSAMIteratorAdapter.adapt(reads, iterator), CloseableIterator<SAMRecord> iterator = reader.iterator(((BlockDelimitedReadShard)shard).getChunks());
reads.getDownsamplingFraction(), return applyDecoratingIterators(true,
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), StingSAMIteratorAdapter.adapt(reads, iterator),
reads.getSupplementalFilters()); reads.getDownsamplingFraction(),
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
reads.getSupplementalFilters());
}
else if(shard instanceof IndexDelimitedLocusShard) {
CloseableIterator<SAMRecord> iterator = reader.iterator(((IndexDelimitedLocusShard)shard).getChunks());
return applyDecoratingIterators(false,
StingSAMIteratorAdapter.adapt(reads, iterator),
reads.getDownsamplingFraction(),
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
reads.getSupplementalFilters());
}
throw new UnsupportedOperationException("Unable to infer type of this shard.");
} }
/** /**

View File

@ -4,12 +4,7 @@ import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMFileReader;
import net.sf.samtools.util.CloseableIterator; import net.sf.samtools.util.CloseableIterator;
import net.sf.picard.filter.FilteringIterator;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.picard.sam.SamFileHeaderMerger;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.datasources.shards.ReadShard;
import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard; import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard;
import org.broadinstitute.sting.gatk.datasources.shards.ReadDelimitedReadShard; import org.broadinstitute.sting.gatk.datasources.shards.ReadDelimitedReadShard;
@ -21,8 +16,8 @@ import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram; import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
import java.io.File;
import java.util.Collection; import java.util.Collection;
import java.util.Collections;
/* /*
* Copyright (c) 2009 The Broad Institute * Copyright (c) 2009 The Broad Institute
@ -178,9 +173,12 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
reads.getSupplementalFilters()); reads.getSupplementalFilters());
// add the new overlapping detection iterator, if we have a last interval and we're a read based shard // add the new overlapping detection iterator, if we have a last interval and we're a read based shard
if(shard.getGenomeLocs().size() > 1)
throw new StingException("This SAMDataSource does not support multiple intervals within a single shard");
GenomeLoc shardGenomeLoc = shard.getGenomeLocs().get(0);
if (mLastInterval != null && shard.getShardType() == Shard.ShardType.READ_INTERVAL ) if (mLastInterval != null && shard.getShardType() == Shard.ShardType.READ_INTERVAL )
iterator = new PlusOneFixIterator(shard.getGenomeLoc(),new IntervalOverlapIterator(iterator,mLastInterval,false)); iterator = new PlusOneFixIterator(shardGenomeLoc,new IntervalOverlapIterator(iterator,mLastInterval,false));
mLastInterval = shard.getGenomeLoc(); mLastInterval = shardGenomeLoc;
} else { } else {
throw new StingException("seek: Unknown shard type"); throw new StingException("seek: Unknown shard type");
@ -205,7 +203,12 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
if( getHeader().getSequenceDictionary().getSequences().size() == 0 ) if( getHeader().getSequenceDictionary().getSequences().size() == 0 )
throw new StingException("Unable to seek to the given locus; reads data source has no alignment information."); throw new StingException("Unable to seek to the given locus; reads data source has no alignment information.");
return createIterator( new MappedStreamSegment(shard.getGenomeLoc()) );
if(shard.getGenomeLocs().size() > 1)
throw new StingException("This SAMDataSource does not support multiple intervals within a single shard");
GenomeLoc shardGenomeLoc = shard.getGenomeLocs().get(0);
return createIterator( new MappedStreamSegment(Collections.singletonList(shardGenomeLoc)) );
} }
/** /**
@ -231,11 +234,11 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
if (!intoUnmappedReads) { if (!intoUnmappedReads) {
if (lastReadPos == null) { if (lastReadPos == null) {
lastReadPos = GenomeLocParser.createGenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE); lastReadPos = GenomeLocParser.createGenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE);
iter = createIterator(new MappedStreamSegment(lastReadPos)); iter = createIterator(new MappedStreamSegment(Collections.singletonList(lastReadPos)));
return InitialReadIterator(readShard.getSize(), iter); return InitialReadIterator(readShard.getSize(), iter);
} else { } else {
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1); lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
iter = fastMappedReadSeek(readShard.getSize(), StingSAMIteratorAdapter.adapt(reads, createIterator(new MappedStreamSegment(lastReadPos)))); iter = fastMappedReadSeek(readShard.getSize(), StingSAMIteratorAdapter.adapt(reads, createIterator(new MappedStreamSegment(Collections.singletonList(lastReadPos)))));
} }
if (intoUnmappedReads && !includeUnmappedReads) if (intoUnmappedReads && !includeUnmappedReads)
@ -335,7 +338,7 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
readsTaken = readCount; readsTaken = readCount;
readsSeenAtLastPos = 0; readsSeenAtLastPos = 0;
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1); lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
CloseableIterator<SAMRecord> ret = createIterator(new MappedStreamSegment(lastReadPos)); CloseableIterator<SAMRecord> ret = createIterator(new MappedStreamSegment(Collections.singletonList(lastReadPos)));
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, ret), readCount); return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, ret), readCount);
} }
} }

View File

@ -67,7 +67,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
* @return Iterator through the data. * @return Iterator through the data.
*/ */
public Iterator seek( Shard shard ) { public Iterator seek( Shard shard ) {
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(shard.getGenomeLoc()) ); SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(shard.getGenomeLocs()) );
return iterator; return iterator;
} }

View File

@ -170,8 +170,10 @@ class EntireStream implements DataStreamSegment {
*/ */
class MappedStreamSegment implements DataStreamSegment { class MappedStreamSegment implements DataStreamSegment {
public final GenomeLoc locus; public final GenomeLoc locus;
public MappedStreamSegment( GenomeLoc locus ) { public MappedStreamSegment( List<GenomeLoc> loci ) {
this.locus = locus; if(loci.size() > 1)
throw new StingException("MappedStreamSegments cannot apply to a range of loci");
this.locus = !loci.isEmpty() ? loci.get(0) : null;
} }
} }

View File

@ -1,18 +1,14 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources; package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMFileReader;
import net.sf.samtools.util.CloseableIterator;
import net.sf.picard.filter.FilteringIterator; import net.sf.picard.filter.FilteringIterator;
import net.sf.picard.filter.SamRecordFilter; import net.sf.picard.filter.SamRecordFilter;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.iterators.*; import org.broadinstitute.sting.gatk.iterators.*;
import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram; import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
import java.io.File; import java.io.File;

View File

@ -134,7 +134,10 @@ public abstract class Accumulator {
* Create a holder for interval results if none exists. Add the result to the holder. * Create a holder for interval results if none exists. Add the result to the holder.
*/ */
public void accumulate( Shard shard, Object result ) { public void accumulate( Shard shard, Object result ) {
intervalAccumulator.add( new Pair<GenomeLoc,Object>( shard.getGenomeLoc(), result ) ); // TODO: The following code is actually wrong we'll be doubly assigning results to locations.
// Fix before the new sharding system comes online.
for(GenomeLoc genomeLoc: shard.getGenomeLocs())
intervalAccumulator.add( new Pair<GenomeLoc,Object>( genomeLoc, result ) );
} }
/** /**

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
/** /**
* User: hanna * User: hanna
* Date: May 12, 2009 * Date: May 12, 2009
@ -23,24 +24,28 @@ import java.util.Iterator;
*/ */
public class GenomeLocusIterator implements Iterator<GenomeLoc> { public class GenomeLocusIterator implements Iterator<GenomeLoc> {
/** /**
* The entire region over which we're iterating. * An iterator to the entire data structure over which we're iterating.
*/ */
private GenomeLoc completeLocus; private final Iterator<GenomeLoc> locusIterator;
/** /**
* The current position in the traversal. * The multi-base pair long locus referring to the current locus.
*/ */
private GenomeLoc currentLocus; private GenomeLoc currentLocus = null;
/**
* The 1 base pair long location.
*/
private GenomeLoc currentLocation = null;
/** /**
* Creates an iterator that can traverse over the entire * Creates an iterator that can traverse over the entire
* reference specified in the given ShardDataProvider. * reference specified in the given ShardDataProvider.
* @param completeLocus Data provider to use as a backing source. * @param loci the list of loci over which to iterate.
* Provider must have a reference (hasReference() == true).
*/ */
public GenomeLocusIterator( GenomeLoc completeLocus ) { public GenomeLocusIterator( List<GenomeLoc> loci ) {
this.completeLocus = completeLocus; this.locusIterator = loci.iterator();
this.currentLocus = GenomeLocParser.createGenomeLoc(completeLocus.getContig(),completeLocus.getStart()); seedNextLocus();
} }
/** /**
@ -48,7 +53,7 @@ public class GenomeLocusIterator implements Iterator<GenomeLoc> {
* @return True if the iterator has more elements. False otherwise. * @return True if the iterator has more elements. False otherwise.
*/ */
public boolean hasNext() { public boolean hasNext() {
return !currentLocus.isPast(completeLocus); return currentLocation != null;
} }
/** /**
@ -58,12 +63,29 @@ public class GenomeLocusIterator implements Iterator<GenomeLoc> {
public GenomeLoc next() { public GenomeLoc next() {
if( !hasNext() ) if( !hasNext() )
throw new NoSuchElementException("No elements remaining in bounded reference region."); throw new NoSuchElementException("No elements remaining in bounded reference region.");
GenomeLoc toReturn = (GenomeLoc)currentLocus.clone(); GenomeLoc toReturn = currentLocation.clone();
currentLocus = GenomeLocParser.incPos(currentLocus); seedNextLocus();
return toReturn; return toReturn;
} }
public void remove() { public void remove() {
throw new UnsupportedOperationException( "ReferenceLocusIterator is read-only" ); throw new UnsupportedOperationException( "ReferenceLocusIterator is read-only" );
} }
/**
* Position currentLocation at the next locus, if possible.
*/
private void seedNextLocus() {
if(currentLocus != null && currentLocation != null)
currentLocation = GenomeLocParser.incPos(currentLocation);
// If initializing or the location was pushed off the current locus, reinitialize using the next locus.
if(currentLocus == null || currentLocation == null || currentLocation.isPast(currentLocus)) {
currentLocus = currentLocation = null;
if(locusIterator.hasNext()){
currentLocus = locusIterator.next();
currentLocation = GenomeLocParser.createGenomeLoc(currentLocus.getContig(),currentLocus.getStart());
}
}
}
} }

View File

@ -23,8 +23,6 @@ import java.util.ArrayList;
*/ */
public class TraverseLoci extends TraversalEngine { public class TraverseLoci extends TraversalEngine {
final private static String LOCI_STRING = "sites"; final private static String LOCI_STRING = "sites";
//final private static boolean ENABLE_ROD_TRAVERSAL = false;
/** /**
* our log, which we want to capture anything from this class * our log, which we want to capture anything from this class
@ -52,14 +50,11 @@ public class TraverseLoci extends TraversalEngine {
LocusView locusView = getLocusView( walker, dataProvider ); LocusView locusView = getLocusView( walker, dataProvider );
//if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA )
// throw new RuntimeException("Engine currently doesn't support RodWalkers");
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
//ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider ); //ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
ReferenceOrderedView referenceOrderedDataView = null; ReferenceOrderedView referenceOrderedDataView = null;
if ( /* ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers || */ WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA ) if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider ); referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
else else
referenceOrderedDataView = (RodLocusView)locusView; referenceOrderedDataView = (RodLocusView)locusView;
@ -110,7 +105,7 @@ public class TraverseLoci extends TraversalEngine {
// We have a final map call to execute here to clean up the skipped based from the // We have a final map call to execute here to clean up the skipped based from the
// last position in the ROD to that in the interval // last position in the ROD to that in the interval
if ( /* GenomeAnalysisEngine.instance.getArguments().enableRodWalkers && */ WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA ) { if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA ) {
RodLocusView rodLocusView = (RodLocusView)locusView; RodLocusView rodLocusView = (RodLocusView)locusView;
long nSkipped = rodLocusView.getLastSkippedBases(); long nSkipped = rodLocusView.getLastSkippedBases();
if ( nSkipped > 0 ) { if ( nSkipped > 0 ) {

View File

@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Pair; import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.StingException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -36,7 +37,9 @@ public class TraverseLocusWindows extends TraversalEngine {
LocusWindowWalker<M, T> locusWindowWalker = (LocusWindowWalker<M, T>)walker; LocusWindowWalker<M, T> locusWindowWalker = (LocusWindowWalker<M, T>)walker;
GenomeLoc interval = shard.getGenomeLoc(); if(shard.getGenomeLocs().size() > 1)
throw new StingException("This traversal does not support multiple intervals within a single shard");
GenomeLoc interval = shard.getGenomeLocs().get(0);
ReadView readView = new ReadView( dataProvider ); ReadView readView = new ReadView( dataProvider );
LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider ); LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );

View File

@ -75,8 +75,12 @@ public class ResourceBundleExtractorDoclet {
resourceText.store(out,"Strings displayed by the Sting help system"); resourceText.store(out,"Strings displayed by the Sting help system");
// ASCII codes for making text blink
final String blink = "\u001B\u005B\u0035\u006D";
final String reset = "\u001B\u005B\u006D";
if(undocumentedWalkers.size() > 0) if(undocumentedWalkers.size() > 0)
Utils.warnUser("The following walkers are currently undocumented: " + Utils.join(" ",undocumentedWalkers)); Utils.warnUser(String.format("The following walkers are currently undocumented: %s%s%s", blink, Utils.join(" ",undocumentedWalkers), reset));
return true; return true;
} }

View File

@ -33,13 +33,18 @@ public class AllLocusViewTest extends LocusViewTemplate {
/** /**
* Test the reads according to an independently derived context. * Test the reads according to an independently derived context.
* @param view * @param view
* @param bounds * @param range
* @param reads * @param reads
*/ */
@Override @Override
protected void testReadsInContext( LocusView view, GenomeLoc bounds, List<SAMRecord> reads ) { protected void testReadsInContext( LocusView view, List<GenomeLoc> range, List<SAMRecord> reads ) {
AllLocusView allLocusView = (AllLocusView)view; AllLocusView allLocusView = (AllLocusView)view;
// TODO: Should skip over loci not in the given range.
GenomeLoc firstLoc = range.get(0);
GenomeLoc lastLoc = range.get(range.size()-1);
GenomeLoc bounds = GenomeLocParser.createGenomeLoc(firstLoc.getContigIndex(),firstLoc.getStart(),lastLoc.getStop());
for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) { for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) {
GenomeLoc site = GenomeLocParser.createGenomeLoc("chr1",i); GenomeLoc site = GenomeLocParser.createGenomeLoc("chr1",i);
AlignmentContext locusContext = allLocusView.next(); AlignmentContext locusContext = allLocusView.next();

View File

@ -36,13 +36,18 @@ public class CoveredLocusViewTest extends LocusViewTemplate {
/** /**
* Test the reads according to an independently derived context. * Test the reads according to an independently derived context.
* @param view * @param view
* @param bounds * @param range
* @param reads * @param reads
*/ */
@Override @Override
protected void testReadsInContext( LocusView view, GenomeLoc bounds, List<SAMRecord> reads ) { protected void testReadsInContext( LocusView view, List<GenomeLoc> range, List<SAMRecord> reads ) {
CoveredLocusView coveredLocusView = (CoveredLocusView)view; CoveredLocusView coveredLocusView = (CoveredLocusView)view;
// TODO: Should skip over loci not in the given range.
GenomeLoc firstLoc = range.get(0);
GenomeLoc lastLoc = range.get(range.size()-1);
GenomeLoc bounds = GenomeLocParser.createGenomeLoc(firstLoc.getContigIndex(),firstLoc.getStart(),lastLoc.getStop());
for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) { for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) {
GenomeLoc site = GenomeLocParser.createGenomeLoc("chr1",i); GenomeLoc site = GenomeLocParser.createGenomeLoc("chr1",i);

View File

@ -87,7 +87,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
*/ */
protected void validateLocation( GenomeLoc loc ) { protected void validateLocation( GenomeLoc loc ) {
Shard shard = new LocusShard(loc); Shard shard = new LocusShard(loc);
GenomeLocusIterator shardIterator = new GenomeLocusIterator(shard.getGenomeLoc()); GenomeLocusIterator shardIterator = new GenomeLocusIterator(shard.getGenomeLocs());
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null); ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
LocusReferenceView view = new LocusReferenceView(dataProvider); LocusReferenceView view = new LocusReferenceView(dataProvider);
@ -99,7 +99,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
char expected = StringUtil.bytesToString(expectedAsSeq.getBases()).charAt(0); char expected = StringUtil.bytesToString(expectedAsSeq.getBases()).charAt(0);
char actual = view.getReferenceContext(locus).getBase(); char actual = view.getReferenceContext(locus).getBase();
Assert.assertEquals(String.format("Value of base at position %s in shard %s does not match expected", locus.toString(), shard.getGenomeLoc()), Assert.assertEquals(String.format("Value of base at position %s in shard %s does not match expected", locus.toString(), shard.getGenomeLocs()),
expected, expected,
actual); actual);
} }

View File

@ -50,7 +50,7 @@ public abstract class LocusViewTemplate extends BaseTest {
LocusView view = createView(dataProvider); LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLoc(), Collections.<SAMRecord>emptyList()); testReadsInContext(view, shard.getGenomeLocs(), Collections.<SAMRecord>emptyList());
} }
@Test @Test
@ -64,7 +64,7 @@ public abstract class LocusViewTemplate extends BaseTest {
LocusView view = createView(dataProvider); LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read)); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
} }
@Test @Test
@ -76,7 +76,7 @@ public abstract class LocusViewTemplate extends BaseTest {
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator); ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider); LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read)); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
} }
@Test @Test
@ -88,7 +88,7 @@ public abstract class LocusViewTemplate extends BaseTest {
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator); ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider); LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read)); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
} }
@Test @Test
@ -100,7 +100,7 @@ public abstract class LocusViewTemplate extends BaseTest {
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator); ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider); LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read)); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
} }
@Test @Test
@ -112,7 +112,7 @@ public abstract class LocusViewTemplate extends BaseTest {
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator); ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider); LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read)); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
} }
@Test @Test
@ -124,7 +124,7 @@ public abstract class LocusViewTemplate extends BaseTest {
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator); ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
LocusView view = createView(dataProvider); LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read)); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
} }
@Test @Test
@ -139,7 +139,7 @@ public abstract class LocusViewTemplate extends BaseTest {
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>(); List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
Collections.addAll(expectedReads, read1, read2); Collections.addAll(expectedReads, read1, read2);
testReadsInContext(view, shard.getGenomeLoc(), expectedReads); testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
} }
@Test @Test
@ -156,7 +156,7 @@ public abstract class LocusViewTemplate extends BaseTest {
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>(); List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
Collections.addAll(expectedReads, read1, read2, read3, read4); Collections.addAll(expectedReads, read1, read2, read3, read4);
testReadsInContext(view, shard.getGenomeLoc(), expectedReads); testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
} }
@Test @Test
@ -173,7 +173,7 @@ public abstract class LocusViewTemplate extends BaseTest {
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>(); List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
Collections.addAll(expectedReads, read1, read2, read3, read4); Collections.addAll(expectedReads, read1, read2, read3, read4);
testReadsInContext(view, shard.getGenomeLoc(), expectedReads); testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
} }
@Test @Test
@ -192,7 +192,7 @@ public abstract class LocusViewTemplate extends BaseTest {
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>(); List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
Collections.addAll(expectedReads, read1, read2, read3, read4, read5, read6); Collections.addAll(expectedReads, read1, read2, read3, read4, read5, read6);
testReadsInContext(view, shard.getGenomeLoc(), expectedReads); testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
} }
@Test @Test
@ -219,7 +219,7 @@ public abstract class LocusViewTemplate extends BaseTest {
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>(); List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
Collections.addAll(expectedReads, read01, read02, read03, read04, read05, read06, Collections.addAll(expectedReads, read01, read02, read03, read04, read05, read06,
read07, read08, read09, read10, read11, read12); read07, read08, read09, read10, read11, read12);
testReadsInContext(view, shard.getGenomeLoc(), expectedReads); testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
} }
/** /**
@ -236,7 +236,7 @@ public abstract class LocusViewTemplate extends BaseTest {
* @param bounds * @param bounds
* @param reads * @param reads
*/ */
protected abstract void testReadsInContext(LocusView view, GenomeLoc bounds, List<SAMRecord> reads); protected abstract void testReadsInContext(LocusView view, List<GenomeLoc> bounds, List<SAMRecord> reads);
/** /**
* Fake a reference sequence file. Essentially, seek a header with a bunch of dummy data. * Fake a reference sequence file. Essentially, seek a header with a bunch of dummy data.
@ -256,7 +256,7 @@ public abstract class LocusViewTemplate extends BaseTest {
} }
public void reset() { public void reset() {
return; // TODO MATT FIX ME return;
} }
}; };
} }

View File

@ -112,8 +112,9 @@ public class IntervalShardStrategyTest extends BaseTest {
int counter = 0; int counter = 0;
while (strat.hasNext()) { while (strat.hasNext()) {
Shard d = strat.next(); Shard d = strat.next();
assertEquals(1, d.getGenomeLoc().getStart()); assertEquals(1,d.getGenomeLocs().size());
assertEquals(1000, d.getGenomeLoc().getStop()); assertEquals(1, d.getGenomeLocs().get(0).getStart());
assertEquals(1000, d.getGenomeLocs().get(0).getStop());
counter++; counter++;
} }
assertEquals(5, counter); assertEquals(5, counter);
@ -130,7 +131,8 @@ public class IntervalShardStrategyTest extends BaseTest {
int counter = 0; int counter = 0;
while (strat.hasNext()) { while (strat.hasNext()) {
Shard d = strat.next(); Shard d = strat.next();
assertEquals(1000, d.getGenomeLoc().getStop()); assertEquals(1,d.getGenomeLocs().size());
assertEquals(1000, d.getGenomeLocs().get(0).getStop());
counter++; counter++;
} }
assertEquals(5, counter); assertEquals(5, counter);

View File

@ -6,6 +6,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
@ -60,14 +61,16 @@ public class IntervalShardTest extends BaseTest {
public void simpleReturn() { public void simpleReturn() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 100); GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 100);
intervalShard = new IntervalShard(loc,Shard.ShardType.LOCUS_INTERVAL); intervalShard = new IntervalShard(loc,Shard.ShardType.LOCUS_INTERVAL);
assertTrue(intervalShard.getGenomeLoc().equals(loc)); assertEquals("Input parameters imply a single-locus shard",1,intervalShard.getGenomeLocs().size());
assertTrue(intervalShard.getGenomeLocs().get(0).equals(loc));
} }
@Test @Test
public void ensureNotReference() { public void ensureNotReference() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 100); GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 100);
intervalShard = new IntervalShard(loc,Shard.ShardType.LOCUS_INTERVAL); intervalShard = new IntervalShard(loc,Shard.ShardType.LOCUS_INTERVAL);
assertTrue(intervalShard.getGenomeLoc() != loc && intervalShard.getGenomeLoc().equals(loc)); assertEquals("Input parameters imply a single-locus shard",1,intervalShard.getGenomeLocs().size());
assertTrue(intervalShard.getGenomeLocs().get(0) != loc && intervalShard.getGenomeLocs().get(0).equals(loc));
} }
} }

View File

@ -6,6 +6,7 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
@ -62,7 +63,8 @@ public class LinearLocusShardStrategyTest extends BaseTest {
while(strat.hasNext()) { while(strat.hasNext()) {
Shard d = strat.next(); Shard d = strat.next();
assertTrue(d instanceof LocusShard); assertTrue(d instanceof LocusShard);
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 499); assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size());
assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 499);
++counter; ++counter;
} }
assertTrue(counter == 10); assertTrue(counter == 10);
@ -76,7 +78,8 @@ public class LinearLocusShardStrategyTest extends BaseTest {
while(strat.hasNext()) { while(strat.hasNext()) {
Shard d = strat.next(); Shard d = strat.next();
assertTrue(d instanceof LocusShard); assertTrue(d instanceof LocusShard);
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 999); assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size());
assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 999);
++counter; ++counter;
} }
assertTrue(counter == 5); assertTrue(counter == 5);
@ -90,10 +93,11 @@ public class LinearLocusShardStrategyTest extends BaseTest {
while(strat.hasNext()) { while(strat.hasNext()) {
Shard d = strat.next(); Shard d = strat.next();
assertTrue(d instanceof LocusShard); assertTrue(d instanceof LocusShard);
assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size());
if (counter % 2 == 0) { if (counter % 2 == 0) {
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 599); assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 599);
} else { } else {
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 399); assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 399);
} }
++counter; ++counter;
} }
@ -108,7 +112,8 @@ public class LinearLocusShardStrategyTest extends BaseTest {
while(strat.hasNext()) { while(strat.hasNext()) {
Shard d = strat.next(); Shard d = strat.next();
assertTrue(d instanceof LocusShard); assertTrue(d instanceof LocusShard);
assertTrue((d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart()) == 199); assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size());
assertTrue((d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart()) == 199);
++counter; ++counter;
} }
assertTrue(counter == 1); assertTrue(counter == 1);

View File

@ -67,20 +67,6 @@ public class ShardStrategyFactoryTest extends BaseTest {
assertTrue(st instanceof LinearLocusShardStrategy); assertTrue(st instanceof LinearLocusShardStrategy);
} }
@Test
public void testExpNonInterval() {
ShardStrategy st = ShardStrategyFactory.shatter(null,ShardStrategyFactory.SHATTER_STRATEGY.EXPONENTIAL,header.getSequenceDictionary(),100);
assertTrue(st instanceof ExpGrowthLocusShardStrategy);
}
@Test
public void testExpInterval() {
GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100);
set.add(l);
ShardStrategy st = ShardStrategyFactory.shatter(null,ShardStrategyFactory.SHATTER_STRATEGY.EXPONENTIAL,header.getSequenceDictionary(),100,set);
assertTrue(st instanceof ExpGrowthLocusShardStrategy);
}
@Test @Test
public void testLinearInterval() { public void testLinearInterval() {
GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100); GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100);

View File

@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.*;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.util.Collections;
/** /**
* User: hanna * User: hanna
* Date: May 21, 2009 * Date: May 21, 2009
@ -54,7 +55,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test @Test
public void testCreateSingleIterator() { public void testCreateSingleIterator() {
ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod); ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod);
SeekableRODIterator iterator = (SeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) ); SeekableRODIterator iterator = (SeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite1)) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
@ -75,10 +76,10 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test @Test
public void testCreateMultipleIterators() { public void testCreateMultipleIterators() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod); ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
SeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); SeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite1)) );
// Create a new iterator at position 2. // Create a new iterator at position 2.
SeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) ); SeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite2)) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators()); Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
@ -125,7 +126,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test @Test
public void testIteratorConservation() { public void testIteratorConservation() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod); ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite1)) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
@ -139,7 +140,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
iteratorPool.release(iterator); iteratorPool.release(iterator);
// Create another iterator after the current iterator. // Create another iterator after the current iterator.
iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) ); iterator = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite3)) );
// Make sure that the previously acquired iterator was reused. // Make sure that the previously acquired iterator was reused.
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
@ -160,7 +161,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
@Test @Test
public void testIteratorCreation() { public void testIteratorCreation() {
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod); ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) ); SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite3)) );
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators()); Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators()); Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
@ -174,7 +175,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
iteratorPool.release(iterator); iteratorPool.release(iterator);
// Create another iterator after the current iterator. // Create another iterator after the current iterator.
iterator = iteratorPool.iterator(new MappedStreamSegment(testSite1) ); iterator = iteratorPool.iterator(new MappedStreamSegment(Collections.singletonList(testSite1)) );
// Make sure that the previously acquired iterator was reused. // Make sure that the previously acquired iterator was reused.
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators()); Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());

View File

@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
@ -96,7 +97,8 @@ public class SAMBAMDataSourceTest extends BaseTest {
int readCount = 0; int readCount = 0;
count++; count++;
logger.debug("Start : " + sh.getGenomeLoc().getStart() + " stop : " + sh.getGenomeLoc().getStop() + " contig " + sh.getGenomeLoc().getContig()); GenomeLoc firstLocus = sh.getGenomeLocs().get(0), lastLocus = sh.getGenomeLocs().get(sh.getGenomeLocs().size()-1);
logger.debug("Start : " + firstLocus.getStart() + " stop : " + lastLocus.getStop() + " contig " + firstLocus.getContig());
logger.debug("count = " + count); logger.debug("count = " + count);
StingSAMIterator datum = data.seek(sh); StingSAMIterator datum = data.seek(sh);