Basic support for very simple index-driven locus traversals. Interface has been changed to
support batched intervals in a single shard, but intervals are not yet compressed into a single shard. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2730 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4810e9c9cd
commit
3d922a019f
|
|
@ -184,6 +184,19 @@ class BAMFileReader2
|
|||
return mCurrentIterator;
|
||||
}
|
||||
|
||||
public List<Chunk> getOverlappingFilePointers(final String sequence, final int start, final int end) {
|
||||
long[] filePointers = null;
|
||||
|
||||
final SAMFileHeader fileHeader = getFileHeader();
|
||||
int referenceIndex = fileHeader.getSequenceIndex(sequence);
|
||||
if (referenceIndex != -1) {
|
||||
final BAMFileIndex fileIndex = getFileIndex();
|
||||
filePointers = fileIndex.getSearchBins(referenceIndex, start, end);
|
||||
}
|
||||
|
||||
return Chunk.toChunkList(filePointers);
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare to iterate through the SAMRecords that match the given interval.
|
||||
* Only a single iterator on a BAMFile can be extant at a time. The previous one must be closed
|
||||
|
|
|
|||
|
|
@ -199,9 +199,16 @@ public class SAMFileReader2 implements Iterable<SAMRecord> {
|
|||
*/
|
||||
public CloseableIterator<SAMRecord> iterator(List<Chunk> chunks) {
|
||||
// TODO: Add sanity checks so that we're not doing this against a BAM file.
|
||||
if(!(mReader instanceof ReaderImplementation2))
|
||||
if(!(mReader instanceof BAMFileReader2))
|
||||
throw new PicardException("This call requires a ReaderImplementation2-compliant interface");
|
||||
return ((ReaderImplementation2)mReader).getIterator(chunks);
|
||||
return ((BAMFileReader2)mReader).getIterator(chunks);
|
||||
}
|
||||
|
||||
public List<Chunk> getOverlappingFilePointers(final String sequence, final int start, final int end) {
|
||||
// TODO: Add sanity checks so that we're not doing this against a BAM file.
|
||||
if(!(mReader instanceof BAMFileReader2))
|
||||
throw new PicardException("This call requires a ReaderImplementation2-compliant interface");
|
||||
return ((BAMFileReader2)mReader).getOverlappingFilePointers(sequence,start,end);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -609,9 +609,8 @@ public class GenomeAnalysisEngine {
|
|||
shardType = (walker.isReduceByInterval()) ?
|
||||
ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL :
|
||||
ShardStrategyFactory.SHATTER_STRATEGY.LINEAR;
|
||||
|
||||
shardStrategy = ShardStrategyFactory.shatter(readsDataSource,
|
||||
shardType,
|
||||
argCollection.experimentalSharding ? ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL : shardType,
|
||||
drivingDataSource.getSequenceDictionary(),
|
||||
SHARD_SIZE,
|
||||
intervals, maxIterations);
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ public class AllLocusView extends LocusView {
|
|||
public AllLocusView(ShardDataProvider provider) {
|
||||
super( provider );
|
||||
// Seed the state tracking members with the first possible seek position and the first possible locus context.
|
||||
locusIterator = new GenomeLocusIterator( provider.getShard().getGenomeLoc() );
|
||||
locusIterator = new GenomeLocusIterator( provider.getShard().getGenomeLocs() );
|
||||
if( locusIterator.hasNext() ) {
|
||||
nextPosition = locusIterator.next();
|
||||
nextLocus = hasNextLocus() ? nextLocus() : createEmptyLocus(nextPosition);
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ import org.broadinstitute.sting.gatk.walkers.Reference;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
|
||||
import java.util.List;
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
|
|
@ -69,7 +71,7 @@ public class LocusReferenceView extends ReferenceView {
|
|||
*/
|
||||
public LocusReferenceView( ShardDataProvider provider ) {
|
||||
super(provider);
|
||||
bounds = provider.getShard().getGenomeLoc();
|
||||
initializeBounds(provider);
|
||||
windowStart = windowStop = 0;
|
||||
initializeReferenceSequence(bounds);
|
||||
}
|
||||
|
|
@ -80,7 +82,7 @@ public class LocusReferenceView extends ReferenceView {
|
|||
*/
|
||||
public LocusReferenceView( Walker walker, ShardDataProvider provider ) {
|
||||
super( provider );
|
||||
bounds = provider.getShard().getGenomeLoc();
|
||||
initializeBounds(provider);
|
||||
|
||||
// Retrieve information about the window being accessed.
|
||||
if( walker.getClass().isAnnotationPresent(Reference.class) ) {
|
||||
|
|
@ -131,6 +133,22 @@ public class LocusReferenceView extends ReferenceView {
|
|||
initializeReferenceSequence(GenomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop));
|
||||
}
|
||||
|
||||
private void initializeBounds(ShardDataProvider provider) {
|
||||
List<GenomeLoc> loci = provider.getShard().getGenomeLocs();
|
||||
|
||||
if(loci.isEmpty()) {
|
||||
bounds = null;
|
||||
return;
|
||||
}
|
||||
|
||||
GenomeLoc firstLocus = loci.get(0);
|
||||
GenomeLoc lastLocus = loci.get(loci.size()-1);
|
||||
if(firstLocus.getContigIndex() != lastLocus.getContigIndex())
|
||||
throw new StingException("LocusReferenceView currently only supports multiple intervals on the same contig.");
|
||||
|
||||
bounds = GenomeLocParser.createGenomeLoc(firstLocus.getContig(),firstLocus.getStart(),lastLocus.getStop());
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize reference sequence data using the given locus.
|
||||
* @param locus
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
|||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
|
@ -111,7 +112,8 @@ public abstract class LocusView extends LocusIterator implements View {
|
|||
* @return True if another locus context is bounded by this shard.
|
||||
*/
|
||||
protected boolean hasNextLocus() {
|
||||
return nextLocus != null && (shard.getGenomeLoc() == null || !nextLocus.getLocation().isPast(shard.getGenomeLoc()));
|
||||
GenomeLoc lastLocus = !shard.getGenomeLocs().isEmpty() ? shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1) : null;
|
||||
return nextLocus != null && (lastLocus == null || !nextLocus.getLocation().isPast(lastLocus));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -120,7 +122,9 @@ public abstract class LocusView extends LocusIterator implements View {
|
|||
* @throw NoSuchElementException if the next element is missing.
|
||||
*/
|
||||
protected AlignmentContext nextLocus() {
|
||||
if( nextLocus == null || (shard.getGenomeLoc() != null && nextLocus.getLocation().isPast(shard.getGenomeLoc())) )
|
||||
GenomeLoc lastLocus = !shard.getGenomeLocs().isEmpty() ? shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1) : null;
|
||||
|
||||
if( nextLocus == null || (lastLocus != null && nextLocus.getLocation().isPast(lastLocus)) )
|
||||
throw new NoSuchElementException("No more elements remain in locus context queue.");
|
||||
|
||||
// Cache the current and apply filtering.
|
||||
|
|
@ -131,7 +135,7 @@ public abstract class LocusView extends LocusIterator implements View {
|
|||
nextLocus = loci.next();
|
||||
if( sourceInfo.getDownsampleToCoverage() != null )
|
||||
current.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() );
|
||||
if( shard.getGenomeLoc() != null && nextLocus.getLocation().isPast(shard.getGenomeLoc()) )
|
||||
if( lastLocus != null && nextLocus.getLocation().isPast(lastLocus) )
|
||||
nextLocus = null;
|
||||
}
|
||||
else
|
||||
|
|
@ -152,13 +156,13 @@ public abstract class LocusView extends LocusIterator implements View {
|
|||
nextLocus = loci.next();
|
||||
|
||||
// If the location of this shard is available, trim the data stream to match the shard.
|
||||
if(shard.getGenomeLoc() != null) {
|
||||
if(!shard.getGenomeLocs().isEmpty()) {
|
||||
// Iterate past cruft at the beginning to the first locus in the shard.
|
||||
while( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLoc()) && loci.hasNext() )
|
||||
while( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLocs().get(0)) && loci.hasNext() )
|
||||
nextLocus = loci.next();
|
||||
|
||||
// If nothing in the shard was found, indicate that by setting nextAlignmentContext to null.
|
||||
if( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLoc()) )
|
||||
if( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLocs().get(0)) )
|
||||
nextLocus = null;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 21, 2009
|
||||
|
|
@ -38,9 +37,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
GenomeLoc lastLoc = null;
|
||||
RODRecordList<ReferenceOrderedDatum> interval = null;
|
||||
|
||||
// broken support for multi-locus rods
|
||||
//List<ReferenceOrderedDatum> multiLocusRODs = new LinkedList<ReferenceOrderedDatum>();
|
||||
|
||||
/**
|
||||
* The data sources along with their current states.
|
||||
*/
|
||||
|
|
@ -61,18 +57,17 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
public RodLocusView( ShardDataProvider provider ) {
|
||||
super(provider);
|
||||
|
||||
GenomeLoc loc = provider.getShard().getGenomeLoc();
|
||||
GenomeLoc firstLoc = provider.getShard().getGenomeLocs().get(0);
|
||||
|
||||
List< Iterator<RODRecordList<ReferenceOrderedDatum>> > iterators = new LinkedList< Iterator<RODRecordList<ReferenceOrderedDatum>> >();
|
||||
for( ReferenceOrderedDataSource dataSource: provider.getReferenceOrderedData() ) {
|
||||
if ( DEBUG ) System.out.printf("Shard is %s%n", loc);
|
||||
if ( DEBUG ) System.out.printf("Shard is %s%n", provider.getShard().getGenomeLocs());
|
||||
|
||||
// grab the ROD iterator from the data source, and compute the first location in this shard, forwarding
|
||||
// the iterator to immediately before it, so that it can be added to the merging iterator primed for
|
||||
// next() to return the first real ROD in this shard
|
||||
SeekableRODIterator it = (SeekableRODIterator)dataSource.seek(provider.getShard());
|
||||
GenomeLoc shardLoc = provider.getShard().getGenomeLoc();
|
||||
it.seekForward(GenomeLocParser.createGenomeLoc(shardLoc.getContigIndex(), shardLoc.getStart()-1, shardLoc.getStart()-1));
|
||||
it.seekForward(GenomeLocParser.createGenomeLoc(firstLoc.getContigIndex(), firstLoc.getStart()-1));
|
||||
|
||||
states.add(new ReferenceOrderedDataState(dataSource,it));
|
||||
|
||||
|
|
@ -99,7 +94,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
if ( ! rodQueue.hasNext() )
|
||||
return false;
|
||||
else {
|
||||
return ! rodQueue.peekLocation().isPast(shard.getGenomeLoc());
|
||||
GenomeLoc lastLocus = shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1);
|
||||
return ! rodQueue.peekLocation().isPast(lastLocus);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -148,11 +144,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
return t;
|
||||
}
|
||||
|
||||
private Collection<RODRecordList<ReferenceOrderedDatum>> getSpanningTracks(ReferenceOrderedDatum marker) {
|
||||
RODRecordList<ReferenceOrderedDatum> wrapper = new RODRecordList<ReferenceOrderedDatum>(marker.getName(),Collections.singletonList(marker),marker.getLocation());
|
||||
return rodQueue.allElementsLTE(wrapper);
|
||||
}
|
||||
|
||||
private Collection<RODRecordList<ReferenceOrderedDatum>> getSpanningTracks(RODRecordList<ReferenceOrderedDatum> marker) {
|
||||
return rodQueue.allElementsLTE(marker);
|
||||
}
|
||||
|
|
@ -173,7 +164,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
if ( lastLoc == null ) {
|
||||
// special case -- we're at the start
|
||||
//System.out.printf("Cur=%s, shard=%s%n", currentPos, shard.getGenomeLoc());
|
||||
skippedBases = currentPos.getStart() - shard.getGenomeLoc().getStart();
|
||||
GenomeLoc firstLoc = shard.getGenomeLocs().get(0);
|
||||
skippedBases = currentPos.getStart() - firstLoc.getStart();
|
||||
} else {
|
||||
//System.out.printf("Cur=%s, last=%s%n", currentPos, lastLoc);
|
||||
skippedBases = currentPos.minus(lastLoc) - 1;
|
||||
|
|
@ -181,7 +173,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
|
||||
if ( skippedBases < -1 ) { // minus 1 value is ok
|
||||
throw new RuntimeException(String.format("BUG: skipped bases=%d is < 0: cur=%s vs. last=%s, shard=%s",
|
||||
skippedBases, currentPos, lastLoc, shard.getGenomeLoc()));
|
||||
skippedBases, currentPos, lastLoc, shard.getGenomeLocs()));
|
||||
}
|
||||
return Math.max(skippedBases, 0);
|
||||
}
|
||||
|
|
@ -191,9 +183,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
* @return
|
||||
*/
|
||||
public GenomeLoc getLocOneBeyondShard() {
|
||||
return GenomeLocParser.createGenomeLoc( shard.getGenomeLoc().getContigIndex(),
|
||||
shard.getGenomeLoc().getStop()+1,
|
||||
shard.getGenomeLoc().getStop()+1);
|
||||
GenomeLoc lastLocus = !shard.getGenomeLocs().isEmpty() ? shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1) : null;
|
||||
return GenomeLocParser.createGenomeLoc(lastLocus.getContigIndex(),lastLocus.getStop()+1);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,106 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Apr 6, 2009
|
||||
* <p/>
|
||||
* Class LinearShard
|
||||
* <p/>
|
||||
* A exponential strategy
|
||||
*/
|
||||
public class ExpGrowthLocusShardStrategy extends LocusShardStrategy {
|
||||
|
||||
// fixed size
|
||||
private long baseSize = 100000;
|
||||
private long currentExp = 0;
|
||||
|
||||
/**
|
||||
* the constructor, taking a seq dictionary to parse out contigs
|
||||
*
|
||||
* @param dic the seq dictionary
|
||||
*/
|
||||
ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize, long limitByCount) {
|
||||
super(dic);
|
||||
this.limitingFactor = limitByCount;
|
||||
this.baseSize = startSize;
|
||||
currentExp = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* the constructor, taking a seq dictionary to parse out contigs
|
||||
*
|
||||
* @param strat the shatter to convert from
|
||||
*/
|
||||
ExpGrowthLocusShardStrategy(LocusShardStrategy strat) {
|
||||
super(strat);
|
||||
this.baseSize = strat.nextShardSize();
|
||||
currentExp = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* The constructor, for a genomic list, start size, and a reference dictionary
|
||||
*
|
||||
* @param dic the reference dictionary
|
||||
* @param startSize the starting size of the shard
|
||||
* @param lst locations to iterate from
|
||||
*/
|
||||
ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst, long limitByCount) {
|
||||
super(dic, lst);
|
||||
this.limitingFactor = limitByCount;
|
||||
this.baseSize = startSize;
|
||||
this.currentExp = 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* set the next shards size
|
||||
*
|
||||
* @param size adjust the next size to this
|
||||
*/
|
||||
public void adjustNextShardSize(long size) {
|
||||
baseSize = size;
|
||||
currentExp = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is how the various shards strategies implements their approach
|
||||
*
|
||||
* @return the next shard size
|
||||
*/
|
||||
protected long nextShardSize() {
|
||||
// we grow the exponentially, we just have to make sure we start at zero
|
||||
++currentExp;
|
||||
return (long) Math.floor(Math.pow((double) baseSize, (double) currentExp));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import net.sf.samtools.Chunk;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A shard that's delimited based on the index rather than
|
||||
*/
|
||||
public class IndexDelimitedLocusShard implements Shard {
|
||||
|
||||
/**
|
||||
* a collection of genomic locations to interate over
|
||||
*/
|
||||
private final GenomeLocSortedSet intervals;
|
||||
|
||||
/**
|
||||
* A list of the chunks associated with this shard.
|
||||
*/
|
||||
private final List<Chunk> chunks;
|
||||
|
||||
IndexDelimitedLocusShard(GenomeLocSortedSet intervals, List<Chunk> chunks) {
|
||||
this.intervals = intervals;
|
||||
this.chunks = chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* The locations represented by this shard.
|
||||
* @return the genome location represented by this shard
|
||||
*/
|
||||
public List<GenomeLoc> getGenomeLocs() {
|
||||
return intervals.toList();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the chunks associated with this locus shard.
|
||||
* @return A list of the chunks to use when retrieving locus data.
|
||||
*/
|
||||
public List<Chunk> getChunks() {
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns the type of shard, LOCUS_INTERVAL.
|
||||
* @return LOCUS_INTERVAL, indicating the shard type
|
||||
*/
|
||||
public ShardType getShardType() {
|
||||
return ShardType.LOCUS_INTERVAL;
|
||||
}
|
||||
|
||||
/**
|
||||
* String representation of this shard.
|
||||
* @return A string representation of the boundaries of this shard.
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return intervals.toString();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.BlockDrivenSAMDataSource;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.samtools.Chunk;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* A sharding strategy for loci based on reading of the index.
|
||||
*/
|
||||
public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
|
||||
|
||||
/** our storage of the genomic locations they'd like to shard over */
|
||||
private final SortedMap<GenomeLoc,List<Chunk>> locations = new TreeMap<GenomeLoc,List<Chunk>>();
|
||||
|
||||
/**
|
||||
* construct the shard strategy from a seq dictionary, a shard size, and and genomeLocs
|
||||
* @param dataSource Data source from which to load index data.
|
||||
* @param locations List of locations for which to load data.
|
||||
*/
|
||||
IndexDelimitedLocusShardStrategy(SAMDataSource dataSource, GenomeLocSortedSet locations) {
|
||||
for(GenomeLoc location: locations)
|
||||
this.locations.put(location,((BlockDrivenSAMDataSource)dataSource).getOverlappingFilePointers(location));
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if there are additional shards
|
||||
*
|
||||
* @return false if we're done processing shards
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
return ( !locations.isEmpty() );
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the next Shard
|
||||
*
|
||||
* @return the next shard
|
||||
*/
|
||||
public IndexDelimitedLocusShard next() {
|
||||
if (( this.locations == null ) || ( locations.isEmpty() )) {
|
||||
throw new StingException("IntervalShardStrategy: genomic regions list is empty in next() function.");
|
||||
}
|
||||
|
||||
// get the first region in the list
|
||||
GenomeLoc loc = locations.firstKey();
|
||||
List<Chunk> filePointers = locations.get(loc);
|
||||
locations.remove(loc);
|
||||
|
||||
return new IndexDelimitedLocusShard(GenomeLocSortedSet.createSetFromList(Arrays.asList(loc)),filePointers);
|
||||
}
|
||||
|
||||
/** we don't support the remove command */
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("ShardStrategies don't support remove()");
|
||||
}
|
||||
|
||||
/**
|
||||
* makes the IntervalShard iterable, i.e. usable in a for loop.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Iterator<Shard> iterator() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
@ -2,6 +2,9 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -50,8 +53,8 @@ public class IntervalShard implements Shard {
|
|||
}
|
||||
|
||||
/** @return the genome location represented by this shard */
|
||||
public GenomeLoc getGenomeLoc() {
|
||||
return mSet;
|
||||
public List<GenomeLoc> getGenomeLocs() {
|
||||
return Collections.singletonList(mSet);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -2,6 +2,9 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
|
|
@ -40,8 +43,8 @@ public class LocusShard implements Shard {
|
|||
}
|
||||
|
||||
/** @return the genome location represented by this shard */
|
||||
public GenomeLoc getGenomeLoc() {
|
||||
return mLoc;
|
||||
public List<GenomeLoc> getGenomeLocs() {
|
||||
return Collections.singletonList(mLoc);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A single, monolithic shard bridging all available data.
|
||||
* @author mhanna
|
||||
|
|
@ -28,8 +31,8 @@ public class MonolithicShard implements Shard {
|
|||
* Returns null, indicating that (in this case) the entire genome is covered.
|
||||
* @return null.
|
||||
*/
|
||||
public GenomeLoc getGenomeLoc() {
|
||||
return null;
|
||||
public List<GenomeLoc> getGenomeLocs() {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
|
|
@ -24,7 +26,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
*/
|
||||
public abstract class ReadShard implements Shard {
|
||||
/** @return the genome location represented by this shard */
|
||||
public GenomeLoc getGenomeLoc() {
|
||||
public List<GenomeLoc> getGenomeLocs() {
|
||||
throw new UnsupportedOperationException("ReadShard isn't genome loc aware");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
|
|
@ -34,7 +35,7 @@ public interface Shard extends Serializable {
|
|||
}
|
||||
|
||||
/** @return the genome location represented by this shard */
|
||||
public GenomeLoc getGenomeLoc();
|
||||
public List<GenomeLoc> getGenomeLocs();
|
||||
|
||||
/**
|
||||
* what kind of shard do we return
|
||||
|
|
|
|||
|
|
@ -38,17 +38,13 @@ import java.io.File;
|
|||
public class ShardStrategyFactory {
|
||||
public enum SHATTER_STRATEGY {
|
||||
LINEAR,
|
||||
EXPONENTIAL,
|
||||
READS,
|
||||
READS_EXPERIMENTAL,
|
||||
INTERVAL,
|
||||
MONOLITHIC // Put all of the available data into one shard.
|
||||
MONOLITHIC, // Put all of the available data into one shard.
|
||||
LOCUS_EXPERIMENTAL,
|
||||
READS_EXPERIMENTAL
|
||||
}
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
private static Logger logger = Logger.getLogger(ShardStrategyFactory.class);
|
||||
|
||||
|
||||
/**
|
||||
* get a new shatter strategy
|
||||
*
|
||||
|
|
@ -75,14 +71,14 @@ public class ShardStrategyFactory {
|
|||
switch (strat) {
|
||||
case LINEAR:
|
||||
return new LinearLocusShardStrategy(dic, startingSize, limitByCount);
|
||||
case EXPONENTIAL:
|
||||
return new ExpGrowthLocusShardStrategy(dic, startingSize, limitByCount);
|
||||
case READS:
|
||||
return new ReadDelimitedReadShardStrategy(startingSize, limitByCount);
|
||||
case READS_EXPERIMENTAL:
|
||||
return new BlockDelimitedReadShardStrategy(dataSource);
|
||||
case INTERVAL:
|
||||
throw new StingException("Requested trategy: " + strat + " doesn't work with the limiting count (-M) command line option");
|
||||
case LOCUS_EXPERIMENTAL:
|
||||
throw new UnsupportedOperationException("Cannot do experimental locus sharding without intervals");
|
||||
case READS_EXPERIMENTAL:
|
||||
return new BlockDelimitedReadShardStrategy(dataSource);
|
||||
default:
|
||||
throw new StingException("Strategy: " + strat + " isn't implemented for this type of shatter request");
|
||||
}
|
||||
|
|
@ -115,12 +111,12 @@ public class ShardStrategyFactory {
|
|||
switch (strat) {
|
||||
case LINEAR:
|
||||
return new LinearLocusShardStrategy(dic, startingSize, lst, limitDataCount);
|
||||
case EXPONENTIAL:
|
||||
return new ExpGrowthLocusShardStrategy(dic, startingSize, lst, limitDataCount);
|
||||
case INTERVAL:
|
||||
return new IntervalShardStrategy(startingSize, lst, Shard.ShardType.LOCUS_INTERVAL);
|
||||
case READS:
|
||||
return new IntervalShardStrategy(startingSize, lst, Shard.ShardType.READ_INTERVAL);
|
||||
case LOCUS_EXPERIMENTAL:
|
||||
return new IndexDelimitedLocusShardStrategy(dataSource,lst);
|
||||
case READS_EXPERIMENTAL:
|
||||
throw new UnsupportedOperationException("Cannot do experimental read sharding with intervals");
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -1,19 +1,17 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.BlockDelimitedReadShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.*;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileReader2;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
|
|
@ -32,26 +30,46 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
public BlockDrivenSAMDataSource(Reads reads) {
|
||||
super(reads);
|
||||
|
||||
logger.warn("Experimental sharding is enabled. Many use cases are not supported. Please use with care.");
|
||||
|
||||
if(reads.getReadsFiles().size() > 1)
|
||||
throw new StingException("Experimental sharding strategy cannot handle multiple BAM files at this point.");
|
||||
|
||||
File readsFile = reads.getReadsFiles().get(0);
|
||||
reader = new SAMFileReader2(readsFile);
|
||||
reader.setValidationStringency(reads.getValidationStringency());
|
||||
}
|
||||
|
||||
public boolean hasIndex() {
|
||||
return reader.hasIndex();
|
||||
}
|
||||
|
||||
public List<Chunk> getOverlappingFilePointers(GenomeLoc location) {
|
||||
return reader.getOverlappingFilePointers(location.getContig(),(int)location.getStart(),(int)location.getStop());
|
||||
}
|
||||
|
||||
public StingSAMIterator seek(Shard shard) {
|
||||
if(!(shard instanceof BlockDelimitedReadShard))
|
||||
throw new StingException("Currently unable to operate on types other than block delimited read shards.");
|
||||
CloseableIterator<SAMRecord> iterator = reader.iterator(((BlockDelimitedReadShard)shard).getChunks());
|
||||
return applyDecoratingIterators(true,
|
||||
StingSAMIteratorAdapter.adapt(reads, iterator),
|
||||
reads.getDownsamplingFraction(),
|
||||
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
reads.getSupplementalFilters());
|
||||
if(!(shard instanceof BlockDelimitedReadShard) && !(shard instanceof IndexDelimitedLocusShard))
|
||||
throw new StingException("BlockDrivenSAMDataSource cannot operate on shards of type: " + shard);
|
||||
|
||||
if(shard instanceof ReadShard) {
|
||||
CloseableIterator<SAMRecord> iterator = reader.iterator(((BlockDelimitedReadShard)shard).getChunks());
|
||||
return applyDecoratingIterators(true,
|
||||
StingSAMIteratorAdapter.adapt(reads, iterator),
|
||||
reads.getDownsamplingFraction(),
|
||||
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
reads.getSupplementalFilters());
|
||||
}
|
||||
else if(shard instanceof IndexDelimitedLocusShard) {
|
||||
CloseableIterator<SAMRecord> iterator = reader.iterator(((IndexDelimitedLocusShard)shard).getChunks());
|
||||
return applyDecoratingIterators(false,
|
||||
StingSAMIteratorAdapter.adapt(reads, iterator),
|
||||
reads.getDownsamplingFraction(),
|
||||
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
reads.getSupplementalFilters());
|
||||
}
|
||||
|
||||
throw new UnsupportedOperationException("Unable to infer type of this shard.");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -4,12 +4,7 @@ import net.sf.samtools.SAMFileHeader;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import net.sf.picard.filter.FilteringIterator;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ReadShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ReadDelimitedReadShard;
|
||||
|
|
@ -21,8 +16,8 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -178,9 +173,12 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
|
|||
reads.getSupplementalFilters());
|
||||
|
||||
// add the new overlapping detection iterator, if we have a last interval and we're a read based shard
|
||||
if(shard.getGenomeLocs().size() > 1)
|
||||
throw new StingException("This SAMDataSource does not support multiple intervals within a single shard");
|
||||
GenomeLoc shardGenomeLoc = shard.getGenomeLocs().get(0);
|
||||
if (mLastInterval != null && shard.getShardType() == Shard.ShardType.READ_INTERVAL )
|
||||
iterator = new PlusOneFixIterator(shard.getGenomeLoc(),new IntervalOverlapIterator(iterator,mLastInterval,false));
|
||||
mLastInterval = shard.getGenomeLoc();
|
||||
iterator = new PlusOneFixIterator(shardGenomeLoc,new IntervalOverlapIterator(iterator,mLastInterval,false));
|
||||
mLastInterval = shardGenomeLoc;
|
||||
} else {
|
||||
|
||||
throw new StingException("seek: Unknown shard type");
|
||||
|
|
@ -205,7 +203,12 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
|
|||
|
||||
if( getHeader().getSequenceDictionary().getSequences().size() == 0 )
|
||||
throw new StingException("Unable to seek to the given locus; reads data source has no alignment information.");
|
||||
return createIterator( new MappedStreamSegment(shard.getGenomeLoc()) );
|
||||
|
||||
if(shard.getGenomeLocs().size() > 1)
|
||||
throw new StingException("This SAMDataSource does not support multiple intervals within a single shard");
|
||||
GenomeLoc shardGenomeLoc = shard.getGenomeLocs().get(0);
|
||||
|
||||
return createIterator( new MappedStreamSegment(Collections.singletonList(shardGenomeLoc)) );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -231,11 +234,11 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
|
|||
if (!intoUnmappedReads) {
|
||||
if (lastReadPos == null) {
|
||||
lastReadPos = GenomeLocParser.createGenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE);
|
||||
iter = createIterator(new MappedStreamSegment(lastReadPos));
|
||||
iter = createIterator(new MappedStreamSegment(Collections.singletonList(lastReadPos)));
|
||||
return InitialReadIterator(readShard.getSize(), iter);
|
||||
} else {
|
||||
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
|
||||
iter = fastMappedReadSeek(readShard.getSize(), StingSAMIteratorAdapter.adapt(reads, createIterator(new MappedStreamSegment(lastReadPos))));
|
||||
iter = fastMappedReadSeek(readShard.getSize(), StingSAMIteratorAdapter.adapt(reads, createIterator(new MappedStreamSegment(Collections.singletonList(lastReadPos)))));
|
||||
}
|
||||
|
||||
if (intoUnmappedReads && !includeUnmappedReads)
|
||||
|
|
@ -335,7 +338,7 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
|
|||
readsTaken = readCount;
|
||||
readsSeenAtLastPos = 0;
|
||||
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
|
||||
CloseableIterator<SAMRecord> ret = createIterator(new MappedStreamSegment(lastReadPos));
|
||||
CloseableIterator<SAMRecord> ret = createIterator(new MappedStreamSegment(Collections.singletonList(lastReadPos)));
|
||||
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, ret), readCount);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
|
|||
* @return Iterator through the data.
|
||||
*/
|
||||
public Iterator seek( Shard shard ) {
|
||||
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(shard.getGenomeLoc()) );
|
||||
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(shard.getGenomeLocs()) );
|
||||
return iterator;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -170,8 +170,10 @@ class EntireStream implements DataStreamSegment {
|
|||
*/
|
||||
class MappedStreamSegment implements DataStreamSegment {
|
||||
public final GenomeLoc locus;
|
||||
public MappedStreamSegment( GenomeLoc locus ) {
|
||||
this.locus = locus;
|
||||
public MappedStreamSegment( List<GenomeLoc> loci ) {
|
||||
if(loci.size() > 1)
|
||||
throw new StingException("MappedStreamSegments cannot apply to a range of loci");
|
||||
this.locus = !loci.isEmpty() ? loci.get(0) : null;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,18 +1,14 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import net.sf.picard.filter.FilteringIterator;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.iterators.*;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
|
||||
|
||||
import java.io.File;
|
||||
|
|
|
|||
|
|
@ -134,7 +134,10 @@ public abstract class Accumulator {
|
|||
* Create a holder for interval results if none exists. Add the result to the holder.
|
||||
*/
|
||||
public void accumulate( Shard shard, Object result ) {
|
||||
intervalAccumulator.add( new Pair<GenomeLoc,Object>( shard.getGenomeLoc(), result ) );
|
||||
// TODO: The following code is actually wrong we'll be doubly assigning results to locations.
|
||||
// Fix before the new sharding system comes online.
|
||||
for(GenomeLoc genomeLoc: shard.getGenomeLocs())
|
||||
intervalAccumulator.add( new Pair<GenomeLoc,Object>( genomeLoc, result ) );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 12, 2009
|
||||
|
|
@ -23,24 +24,28 @@ import java.util.Iterator;
|
|||
*/
|
||||
public class GenomeLocusIterator implements Iterator<GenomeLoc> {
|
||||
/**
|
||||
* The entire region over which we're iterating.
|
||||
* An iterator to the entire data structure over which we're iterating.
|
||||
*/
|
||||
private GenomeLoc completeLocus;
|
||||
private final Iterator<GenomeLoc> locusIterator;
|
||||
|
||||
/**
|
||||
* The current position in the traversal.
|
||||
* The multi-base pair long locus referring to the current locus.
|
||||
*/
|
||||
private GenomeLoc currentLocus;
|
||||
private GenomeLoc currentLocus = null;
|
||||
|
||||
/**
|
||||
* The 1 base pair long location.
|
||||
*/
|
||||
private GenomeLoc currentLocation = null;
|
||||
|
||||
/**
|
||||
* Creates an iterator that can traverse over the entire
|
||||
* reference specified in the given ShardDataProvider.
|
||||
* @param completeLocus Data provider to use as a backing source.
|
||||
* Provider must have a reference (hasReference() == true).
|
||||
* @param loci the list of loci over which to iterate.
|
||||
*/
|
||||
public GenomeLocusIterator( GenomeLoc completeLocus ) {
|
||||
this.completeLocus = completeLocus;
|
||||
this.currentLocus = GenomeLocParser.createGenomeLoc(completeLocus.getContig(),completeLocus.getStart());
|
||||
public GenomeLocusIterator( List<GenomeLoc> loci ) {
|
||||
this.locusIterator = loci.iterator();
|
||||
seedNextLocus();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -48,7 +53,7 @@ public class GenomeLocusIterator implements Iterator<GenomeLoc> {
|
|||
* @return True if the iterator has more elements. False otherwise.
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
return !currentLocus.isPast(completeLocus);
|
||||
return currentLocation != null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -58,12 +63,29 @@ public class GenomeLocusIterator implements Iterator<GenomeLoc> {
|
|||
public GenomeLoc next() {
|
||||
if( !hasNext() )
|
||||
throw new NoSuchElementException("No elements remaining in bounded reference region.");
|
||||
GenomeLoc toReturn = (GenomeLoc)currentLocus.clone();
|
||||
currentLocus = GenomeLocParser.incPos(currentLocus);
|
||||
GenomeLoc toReturn = currentLocation.clone();
|
||||
seedNextLocus();
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException( "ReferenceLocusIterator is read-only" );
|
||||
}
|
||||
|
||||
/**
|
||||
* Position currentLocation at the next locus, if possible.
|
||||
*/
|
||||
private void seedNextLocus() {
|
||||
if(currentLocus != null && currentLocation != null)
|
||||
currentLocation = GenomeLocParser.incPos(currentLocation);
|
||||
|
||||
// If initializing or the location was pushed off the current locus, reinitialize using the next locus.
|
||||
if(currentLocus == null || currentLocation == null || currentLocation.isPast(currentLocus)) {
|
||||
currentLocus = currentLocation = null;
|
||||
if(locusIterator.hasNext()){
|
||||
currentLocus = locusIterator.next();
|
||||
currentLocation = GenomeLocParser.createGenomeLoc(currentLocus.getContig(),currentLocus.getStart());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,8 +23,6 @@ import java.util.ArrayList;
|
|||
*/
|
||||
public class TraverseLoci extends TraversalEngine {
|
||||
final private static String LOCI_STRING = "sites";
|
||||
//final private static boolean ENABLE_ROD_TRAVERSAL = false;
|
||||
|
||||
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
|
|
@ -52,14 +50,11 @@ public class TraverseLoci extends TraversalEngine {
|
|||
|
||||
LocusView locusView = getLocusView( walker, dataProvider );
|
||||
|
||||
//if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA )
|
||||
// throw new RuntimeException("Engine currently doesn't support RodWalkers");
|
||||
|
||||
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
|
||||
|
||||
//ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
||||
ReferenceOrderedView referenceOrderedDataView = null;
|
||||
if ( /* ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers || */ WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
|
||||
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
|
||||
referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
|
||||
else
|
||||
referenceOrderedDataView = (RodLocusView)locusView;
|
||||
|
|
@ -110,7 +105,7 @@ public class TraverseLoci extends TraversalEngine {
|
|||
|
||||
// We have a final map call to execute here to clean up the skipped based from the
|
||||
// last position in the ROD to that in the interval
|
||||
if ( /* GenomeAnalysisEngine.instance.getArguments().enableRodWalkers && */ WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA ) {
|
||||
if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA ) {
|
||||
RodLocusView rodLocusView = (RodLocusView)locusView;
|
||||
long nSkipped = rodLocusView.getLastSkippedBases();
|
||||
if ( nSkipped > 0 ) {
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
|
@ -36,7 +37,9 @@ public class TraverseLocusWindows extends TraversalEngine {
|
|||
|
||||
LocusWindowWalker<M, T> locusWindowWalker = (LocusWindowWalker<M, T>)walker;
|
||||
|
||||
GenomeLoc interval = shard.getGenomeLoc();
|
||||
if(shard.getGenomeLocs().size() > 1)
|
||||
throw new StingException("This traversal does not support multiple intervals within a single shard");
|
||||
GenomeLoc interval = shard.getGenomeLocs().get(0);
|
||||
|
||||
ReadView readView = new ReadView( dataProvider );
|
||||
LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||
|
|
|
|||
|
|
@ -75,8 +75,12 @@ public class ResourceBundleExtractorDoclet {
|
|||
|
||||
resourceText.store(out,"Strings displayed by the Sting help system");
|
||||
|
||||
// ASCII codes for making text blink
|
||||
final String blink = "\u001B\u005B\u0035\u006D";
|
||||
final String reset = "\u001B\u005B\u006D";
|
||||
|
||||
if(undocumentedWalkers.size() > 0)
|
||||
Utils.warnUser("The following walkers are currently undocumented: " + Utils.join(" ",undocumentedWalkers));
|
||||
Utils.warnUser(String.format("The following walkers are currently undocumented: %s%s%s", blink, Utils.join(" ",undocumentedWalkers), reset));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,13 +33,18 @@ public class AllLocusViewTest extends LocusViewTemplate {
|
|||
/**
|
||||
* Test the reads according to an independently derived context.
|
||||
* @param view
|
||||
* @param bounds
|
||||
* @param range
|
||||
* @param reads
|
||||
*/
|
||||
@Override
|
||||
protected void testReadsInContext( LocusView view, GenomeLoc bounds, List<SAMRecord> reads ) {
|
||||
protected void testReadsInContext( LocusView view, List<GenomeLoc> range, List<SAMRecord> reads ) {
|
||||
AllLocusView allLocusView = (AllLocusView)view;
|
||||
|
||||
// TODO: Should skip over loci not in the given range.
|
||||
GenomeLoc firstLoc = range.get(0);
|
||||
GenomeLoc lastLoc = range.get(range.size()-1);
|
||||
GenomeLoc bounds = GenomeLocParser.createGenomeLoc(firstLoc.getContigIndex(),firstLoc.getStart(),lastLoc.getStop());
|
||||
|
||||
for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) {
|
||||
GenomeLoc site = GenomeLocParser.createGenomeLoc("chr1",i);
|
||||
AlignmentContext locusContext = allLocusView.next();
|
||||
|
|
|
|||
|
|
@ -36,13 +36,18 @@ public class CoveredLocusViewTest extends LocusViewTemplate {
|
|||
/**
|
||||
* Test the reads according to an independently derived context.
|
||||
* @param view
|
||||
* @param bounds
|
||||
* @param range
|
||||
* @param reads
|
||||
*/
|
||||
@Override
|
||||
protected void testReadsInContext( LocusView view, GenomeLoc bounds, List<SAMRecord> reads ) {
|
||||
protected void testReadsInContext( LocusView view, List<GenomeLoc> range, List<SAMRecord> reads ) {
|
||||
CoveredLocusView coveredLocusView = (CoveredLocusView)view;
|
||||
|
||||
// TODO: Should skip over loci not in the given range.
|
||||
GenomeLoc firstLoc = range.get(0);
|
||||
GenomeLoc lastLoc = range.get(range.size()-1);
|
||||
GenomeLoc bounds = GenomeLocParser.createGenomeLoc(firstLoc.getContigIndex(),firstLoc.getStart(),lastLoc.getStop());
|
||||
|
||||
for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) {
|
||||
GenomeLoc site = GenomeLocParser.createGenomeLoc("chr1",i);
|
||||
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
|
|||
*/
|
||||
protected void validateLocation( GenomeLoc loc ) {
|
||||
Shard shard = new LocusShard(loc);
|
||||
GenomeLocusIterator shardIterator = new GenomeLocusIterator(shard.getGenomeLoc());
|
||||
GenomeLocusIterator shardIterator = new GenomeLocusIterator(shard.getGenomeLocs());
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
|
||||
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||
|
|
@ -99,7 +99,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
|
|||
char expected = StringUtil.bytesToString(expectedAsSeq.getBases()).charAt(0);
|
||||
char actual = view.getReferenceContext(locus).getBase();
|
||||
|
||||
Assert.assertEquals(String.format("Value of base at position %s in shard %s does not match expected", locus.toString(), shard.getGenomeLoc()),
|
||||
Assert.assertEquals(String.format("Value of base at position %s in shard %s does not match expected", locus.toString(), shard.getGenomeLocs()),
|
||||
expected,
|
||||
actual);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
testReadsInContext(view, shard.getGenomeLoc(), Collections.<SAMRecord>emptyList());
|
||||
testReadsInContext(view, shard.getGenomeLocs(), Collections.<SAMRecord>emptyList());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -64,7 +64,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read));
|
||||
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -76,7 +76,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read));
|
||||
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -88,7 +88,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read));
|
||||
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -100,7 +100,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read));
|
||||
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -112,7 +112,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read));
|
||||
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -124,7 +124,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
ShardDataProvider dataProvider = new ShardDataProvider(shard, iterator);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
testReadsInContext(view, shard.getGenomeLoc(), Collections.singletonList(read));
|
||||
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -139,7 +139,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
Collections.addAll(expectedReads, read1, read2);
|
||||
testReadsInContext(view, shard.getGenomeLoc(), expectedReads);
|
||||
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -156,7 +156,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
Collections.addAll(expectedReads, read1, read2, read3, read4);
|
||||
testReadsInContext(view, shard.getGenomeLoc(), expectedReads);
|
||||
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -173,7 +173,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
Collections.addAll(expectedReads, read1, read2, read3, read4);
|
||||
testReadsInContext(view, shard.getGenomeLoc(), expectedReads);
|
||||
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -192,7 +192,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
Collections.addAll(expectedReads, read1, read2, read3, read4, read5, read6);
|
||||
testReadsInContext(view, shard.getGenomeLoc(), expectedReads);
|
||||
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -219,7 +219,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
Collections.addAll(expectedReads, read01, read02, read03, read04, read05, read06,
|
||||
read07, read08, read09, read10, read11, read12);
|
||||
testReadsInContext(view, shard.getGenomeLoc(), expectedReads);
|
||||
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -236,7 +236,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
* @param bounds
|
||||
* @param reads
|
||||
*/
|
||||
protected abstract void testReadsInContext(LocusView view, GenomeLoc bounds, List<SAMRecord> reads);
|
||||
protected abstract void testReadsInContext(LocusView view, List<GenomeLoc> bounds, List<SAMRecord> reads);
|
||||
|
||||
/**
|
||||
* Fake a reference sequence file. Essentially, seek a header with a bunch of dummy data.
|
||||
|
|
@ -256,7 +256,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
}
|
||||
|
||||
public void reset() {
|
||||
return; // TODO MATT FIX ME
|
||||
return;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -112,8 +112,9 @@ public class IntervalShardStrategyTest extends BaseTest {
|
|||
int counter = 0;
|
||||
while (strat.hasNext()) {
|
||||
Shard d = strat.next();
|
||||
assertEquals(1, d.getGenomeLoc().getStart());
|
||||
assertEquals(1000, d.getGenomeLoc().getStop());
|
||||
assertEquals(1,d.getGenomeLocs().size());
|
||||
assertEquals(1, d.getGenomeLocs().get(0).getStart());
|
||||
assertEquals(1000, d.getGenomeLocs().get(0).getStop());
|
||||
counter++;
|
||||
}
|
||||
assertEquals(5, counter);
|
||||
|
|
@ -130,7 +131,8 @@ public class IntervalShardStrategyTest extends BaseTest {
|
|||
int counter = 0;
|
||||
while (strat.hasNext()) {
|
||||
Shard d = strat.next();
|
||||
assertEquals(1000, d.getGenomeLoc().getStop());
|
||||
assertEquals(1,d.getGenomeLocs().size());
|
||||
assertEquals(1000, d.getGenomeLocs().get(0).getStop());
|
||||
counter++;
|
||||
}
|
||||
assertEquals(5, counter);
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
||||
|
|
@ -60,14 +61,16 @@ public class IntervalShardTest extends BaseTest {
|
|||
public void simpleReturn() {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 100);
|
||||
intervalShard = new IntervalShard(loc,Shard.ShardType.LOCUS_INTERVAL);
|
||||
assertTrue(intervalShard.getGenomeLoc().equals(loc));
|
||||
assertEquals("Input parameters imply a single-locus shard",1,intervalShard.getGenomeLocs().size());
|
||||
assertTrue(intervalShard.getGenomeLocs().get(0).equals(loc));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void ensureNotReference() {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 100);
|
||||
intervalShard = new IntervalShard(loc,Shard.ShardType.LOCUS_INTERVAL);
|
||||
assertTrue(intervalShard.getGenomeLoc() != loc && intervalShard.getGenomeLoc().equals(loc));
|
||||
assertEquals("Input parameters imply a single-locus shard",1,intervalShard.getGenomeLocs().size());
|
||||
assertTrue(intervalShard.getGenomeLocs().get(0) != loc && intervalShard.getGenomeLocs().get(0).equals(loc));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
||||
|
|
@ -62,7 +63,8 @@ public class LinearLocusShardStrategyTest extends BaseTest {
|
|||
while(strat.hasNext()) {
|
||||
Shard d = strat.next();
|
||||
assertTrue(d instanceof LocusShard);
|
||||
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 499);
|
||||
assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size());
|
||||
assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 499);
|
||||
++counter;
|
||||
}
|
||||
assertTrue(counter == 10);
|
||||
|
|
@ -76,7 +78,8 @@ public class LinearLocusShardStrategyTest extends BaseTest {
|
|||
while(strat.hasNext()) {
|
||||
Shard d = strat.next();
|
||||
assertTrue(d instanceof LocusShard);
|
||||
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 999);
|
||||
assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size());
|
||||
assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 999);
|
||||
++counter;
|
||||
}
|
||||
assertTrue(counter == 5);
|
||||
|
|
@ -90,10 +93,11 @@ public class LinearLocusShardStrategyTest extends BaseTest {
|
|||
while(strat.hasNext()) {
|
||||
Shard d = strat.next();
|
||||
assertTrue(d instanceof LocusShard);
|
||||
assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size());
|
||||
if (counter % 2 == 0) {
|
||||
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 599);
|
||||
assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 599);
|
||||
} else {
|
||||
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 399);
|
||||
assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 399);
|
||||
}
|
||||
++counter;
|
||||
}
|
||||
|
|
@ -108,7 +112,8 @@ public class LinearLocusShardStrategyTest extends BaseTest {
|
|||
while(strat.hasNext()) {
|
||||
Shard d = strat.next();
|
||||
assertTrue(d instanceof LocusShard);
|
||||
assertTrue((d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart()) == 199);
|
||||
assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size());
|
||||
assertTrue((d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart()) == 199);
|
||||
++counter;
|
||||
}
|
||||
assertTrue(counter == 1);
|
||||
|
|
|
|||
|
|
@ -67,20 +67,6 @@ public class ShardStrategyFactoryTest extends BaseTest {
|
|||
assertTrue(st instanceof LinearLocusShardStrategy);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExpNonInterval() {
|
||||
ShardStrategy st = ShardStrategyFactory.shatter(null,ShardStrategyFactory.SHATTER_STRATEGY.EXPONENTIAL,header.getSequenceDictionary(),100);
|
||||
assertTrue(st instanceof ExpGrowthLocusShardStrategy);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExpInterval() {
|
||||
GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100);
|
||||
set.add(l);
|
||||
ShardStrategy st = ShardStrategyFactory.shatter(null,ShardStrategyFactory.SHATTER_STRATEGY.EXPONENTIAL,header.getSequenceDictionary(),100,set);
|
||||
assertTrue(st instanceof ExpGrowthLocusShardStrategy);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLinearInterval() {
|
||||
GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100);
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.*;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.Collections;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 21, 2009
|
||||
|
|
@ -54,7 +55,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
|
|||
@Test
|
||||
public void testCreateSingleIterator() {
|
||||
ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod);
|
||||
SeekableRODIterator iterator = (SeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
||||
SeekableRODIterator iterator = (SeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite1)) );
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
|
||||
|
|
@ -75,10 +76,10 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
|
|||
@Test
|
||||
public void testCreateMultipleIterators() {
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
|
||||
SeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
||||
SeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite1)) );
|
||||
|
||||
// Create a new iterator at position 2.
|
||||
SeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(testSite2) );
|
||||
SeekableRODIterator iterator2 = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite2)) );
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
|
||||
|
|
@ -125,7 +126,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
|
|||
@Test
|
||||
public void testIteratorConservation() {
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
|
||||
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) );
|
||||
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite1)) );
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
|
||||
|
|
@ -139,7 +140,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
|
|||
iteratorPool.release(iterator);
|
||||
|
||||
// Create another iterator after the current iterator.
|
||||
iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) );
|
||||
iterator = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite3)) );
|
||||
|
||||
// Make sure that the previously acquired iterator was reused.
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
|
||||
|
|
@ -160,7 +161,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
|
|||
@Test
|
||||
public void testIteratorCreation() {
|
||||
ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod);
|
||||
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) );
|
||||
SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(Collections.singletonList(testSite3)) );
|
||||
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
|
||||
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
|
||||
|
|
@ -174,7 +175,7 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
|
|||
iteratorPool.release(iterator);
|
||||
|
||||
// Create another iterator after the current iterator.
|
||||
iterator = iteratorPool.iterator(new MappedStreamSegment(testSite1) );
|
||||
iterator = iteratorPool.iterator(new MappedStreamSegment(Collections.singletonList(testSite1)) );
|
||||
|
||||
// Make sure that the previously acquired iterator was reused.
|
||||
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
|||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
|
|
@ -96,7 +97,8 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
|||
int readCount = 0;
|
||||
count++;
|
||||
|
||||
logger.debug("Start : " + sh.getGenomeLoc().getStart() + " stop : " + sh.getGenomeLoc().getStop() + " contig " + sh.getGenomeLoc().getContig());
|
||||
GenomeLoc firstLocus = sh.getGenomeLocs().get(0), lastLocus = sh.getGenomeLocs().get(sh.getGenomeLocs().size()-1);
|
||||
logger.debug("Start : " + firstLocus.getStart() + " stop : " + lastLocus.getStop() + " contig " + firstLocus.getContig());
|
||||
logger.debug("count = " + count);
|
||||
StingSAMIterator datum = data.seek(sh);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue