First pass at eliminating the old sharding system. Classes required for the original sharding system
are gone where I could identify them, but hierarchies that split to support two sharding systems have not yet been taken apart. @Eric: ~4k lines. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3580 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b694ca9633
commit
612c3fdd9d
|
|
@ -726,7 +726,7 @@ public class GenomeAnalysisEngine {
|
|||
ValidationExclusion exclusions) {
|
||||
// Use monolithic sharding if no index is present. Monolithic sharding is always required for the original
|
||||
// sharding system; it's required with the new sharding system only for locus walkers.
|
||||
if(readsDataSource != null && !readsDataSource.hasIndex() && (argCollection.disableExperimentalSharding || walker instanceof LocusWalker)) {
|
||||
if(readsDataSource != null && !readsDataSource.hasIndex() && walker instanceof LocusWalker) {
|
||||
if(!exclusions.contains(ValidationExclusion.TYPE.ALLOW_UNINDEXED_BAM) || intervals != null)
|
||||
throw new StingException("The GATK cannot currently process unindexed BAM files");
|
||||
|
||||
|
|
@ -756,27 +756,21 @@ public class GenomeAnalysisEngine {
|
|||
if(readsDataSource != null && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
|
||||
Utils.scareUser("Locus walkers can only walk over coordinate-sorted data. Please resort your input BAM file.");
|
||||
|
||||
shardType = (walker.isReduceByInterval()) ?
|
||||
ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL :
|
||||
ShardStrategyFactory.SHATTER_STRATEGY.LINEAR;
|
||||
shardStrategy = ShardStrategyFactory.shatter(readsDataSource,
|
||||
referenceDataSource.getReference(),
|
||||
!argCollection.disableExperimentalSharding ? ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL : shardType,
|
||||
ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL,
|
||||
drivingDataSource.getSequenceDictionary(),
|
||||
SHARD_SIZE,
|
||||
intervals, maxIterations);
|
||||
} else
|
||||
shardStrategy = ShardStrategyFactory.shatter(readsDataSource,
|
||||
referenceDataSource.getReference(),
|
||||
!argCollection.disableExperimentalSharding ? ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL : ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,
|
||||
ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL,
|
||||
drivingDataSource.getSequenceDictionary(),
|
||||
SHARD_SIZE, maxIterations);
|
||||
} else if (walker instanceof ReadWalker ||
|
||||
walker instanceof DuplicateWalker) {
|
||||
if(!argCollection.disableExperimentalSharding)
|
||||
shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL;
|
||||
else
|
||||
shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS;
|
||||
shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL;
|
||||
|
||||
if (intervals != null && !intervals.isEmpty()) {
|
||||
shardStrategy = ShardStrategyFactory.shatter(readsDataSource,
|
||||
|
|
@ -793,8 +787,6 @@ public class GenomeAnalysisEngine {
|
|||
SHARD_SIZE, maxIterations);
|
||||
}
|
||||
} else if (walker instanceof ReadPairWalker) {
|
||||
if(argCollection.disableExperimentalSharding)
|
||||
Utils.scareUser("Pairs traversal cannot be used in conjunction with the old sharding system.");
|
||||
if(readsDataSource != null && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.queryname)
|
||||
Utils.scareUser("Read pair walkers can only walk over query name-sorted data. Please resort your input BAM file.");
|
||||
if(intervals != null && !intervals.isEmpty())
|
||||
|
|
@ -822,13 +814,7 @@ public class GenomeAnalysisEngine {
|
|||
if (reads.getReadsFiles().size() == 0)
|
||||
return null;
|
||||
|
||||
SAMDataSource dataSource = null;
|
||||
if(!argCollection.disableExperimentalSharding)
|
||||
dataSource = new BlockDrivenSAMDataSource(reads);
|
||||
else
|
||||
dataSource = new IndexDrivenSAMDataSource(reads);
|
||||
|
||||
return dataSource;
|
||||
return new BlockDrivenSAMDataSource(reads);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -168,10 +168,6 @@ public class GATKArgumentCollection {
|
|||
// @Argument(fullName = "enableRodWalkers", shortName = "erw", doc = "Enable experimental rodWalker support. TEMPORARY HACK TO ALLOW EXPERIMENTATION WITH ROD WALKERS. [default is false]}.", required = false)
|
||||
// public boolean enableRodWalkers = false;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "disable_experimental_sharding",shortName="ds", doc="Disable the experimental sharding strategy.", required = false)
|
||||
public boolean disableExperimentalSharding = false;
|
||||
|
||||
@ElementList(required = false)
|
||||
@Argument(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
|
||||
public List<String> readGroupBlackList = null;
|
||||
|
|
|
|||
|
|
@ -46,24 +46,14 @@ public class IndexDelimitedLocusShard extends LocusShard implements BAMFormatAwa
|
|||
*/
|
||||
private final Map<SAMReaderID,SAMFileSpan> fileSpans;
|
||||
|
||||
/**
|
||||
* An IndexDelimitedLocusShard can be used either for LOCUS or LOCUS_INTERVAL shard types.
|
||||
* Track which type is being used.
|
||||
*/
|
||||
private final ShardType shardType;
|
||||
|
||||
/**
|
||||
* Create a new locus shard, divided by index.
|
||||
* @param intervals List of intervals to process.
|
||||
* @param fileSpans File spans associated with that interval.
|
||||
* @param shardType Type of the shard; must be either LOCUS or LOCUS_INTERVAL.
|
||||
*/
|
||||
IndexDelimitedLocusShard(List<GenomeLoc> intervals, Map<SAMReaderID,SAMFileSpan> fileSpans, ShardType shardType) {
|
||||
IndexDelimitedLocusShard(List<GenomeLoc> intervals, Map<SAMReaderID,SAMFileSpan> fileSpans) {
|
||||
super(intervals);
|
||||
this.fileSpans = fileSpans;
|
||||
if(shardType != ShardType.LOCUS && shardType != ShardType.LOCUS_INTERVAL)
|
||||
throw new StingException("Attempted to create an IndexDelimitedLocusShard with invalid shard type: " + shardType);
|
||||
this.shardType = shardType;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -125,6 +115,6 @@ public class IndexDelimitedLocusShard extends LocusShard implements BAMFormatAwa
|
|||
*/
|
||||
@Override
|
||||
public ShardType getShardType() {
|
||||
return shardType;
|
||||
return ShardType.LOCUS;
|
||||
}
|
||||
}
|
||||
|
|
@ -127,7 +127,7 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
|
|||
public IndexDelimitedLocusShard next() {
|
||||
FilePointer nextFilePointer = filePointerIterator.next();
|
||||
Map<SAMReaderID,SAMFileSpan> fileSpansBounding = nextFilePointer.fileSpans != null ? nextFilePointer.fileSpans : null;
|
||||
return new IndexDelimitedLocusShard(nextFilePointer.locations,fileSpansBounding,Shard.ShardType.LOCUS_INTERVAL);
|
||||
return new IndexDelimitedLocusShard(nextFilePointer.locations,fileSpansBounding);
|
||||
}
|
||||
|
||||
/** we don't support the remove command */
|
||||
|
|
|
|||
|
|
@ -1,77 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class IntervalShard
|
||||
* <p/>
|
||||
* the base interval shard. All interval shards are generally the same,
|
||||
* but must return their ShardType individually.
|
||||
*/
|
||||
public class IntervalShard implements Shard {
|
||||
|
||||
/** a collection of genomic locations to interate over */
|
||||
private GenomeLoc mSet;
|
||||
private Shard.ShardType mType = Shard.ShardType.LOCUS_INTERVAL;
|
||||
|
||||
IntervalShard(GenomeLoc myLocation, Shard.ShardType intervalType) {
|
||||
if (intervalType != Shard.ShardType.LOCUS_INTERVAL && intervalType != Shard.ShardType.READ_INTERVAL)
|
||||
throw new IllegalArgumentException("The specified interval type must be either LOCUS_INTERVAL or READ_INTERVAL");
|
||||
mType = intervalType;
|
||||
mSet = myLocation.clone();
|
||||
}
|
||||
|
||||
/** @return the genome location represented by this shard */
|
||||
public List<GenomeLoc> getGenomeLocs() {
|
||||
return Collections.singletonList(mSet);
|
||||
}
|
||||
|
||||
/**
|
||||
* returns the type of shard, READ
|
||||
*
|
||||
* @return READ, indicating the shard type
|
||||
*/
|
||||
public Shard.ShardType getShardType() {
|
||||
return mType;
|
||||
}
|
||||
|
||||
/**
|
||||
* String representation of this shard.
|
||||
* @return A string representation of the boundaries of this shard.
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return mSet.toString();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,115 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class ReadByIntervalShardStrategy
|
||||
* <p/>
|
||||
* Impliments the sharding strategy for reads, given a list
|
||||
* of genomic locations. Shards returned will be bounded by the interval,
|
||||
* but each provided interval may be split into a number of smaller regions.
|
||||
*/
|
||||
public class IntervalShardStrategy implements ShardStrategy {
|
||||
|
||||
/** our storage of the genomic locations they'd like to shard over */
|
||||
private final GenomeLocSortedSet regions;
|
||||
|
||||
/** their prefered size of the shard, we can modify this based on what we see in the shards */
|
||||
private long size;
|
||||
private Shard.ShardType type;
|
||||
/**
|
||||
* change the recommended shard size for the next shard we generate. The code will do it's
|
||||
* best to respect this value, but there are no guarantees.
|
||||
*
|
||||
* @param size the next recommended shard size.
|
||||
*/
|
||||
public void adjustNextShardSize( long size ) {
|
||||
this.size = size; // we don't do anything with the size in this case, but we should store it anyway
|
||||
}
|
||||
|
||||
/**
|
||||
* construct the shard strategy from a seq dictionary, a shard size, and and genomeLocs
|
||||
*
|
||||
* @param size
|
||||
* @param locations
|
||||
*/
|
||||
IntervalShardStrategy( long size, GenomeLocSortedSet locations, Shard.ShardType shardType ) {
|
||||
type = shardType;
|
||||
this.regions = locations == null ? new GenomeLocSortedSet() : locations.clone();
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if there are additional shards
|
||||
*
|
||||
* @return false if we're done processing shards
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
return ( !regions.isEmpty() );
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the next Shard
|
||||
*
|
||||
* @return the next shard
|
||||
*/
|
||||
public Shard next() {
|
||||
if (( this.regions == null ) || ( regions.isEmpty() )) {
|
||||
throw new StingException("IntervalShardStrategy: genomic regions list is empty in next() function.");
|
||||
}
|
||||
|
||||
// get the first region in the list
|
||||
GenomeLoc loc = regions.iterator().next();
|
||||
|
||||
regions.remove(loc);
|
||||
return new IntervalShard(loc,type);
|
||||
|
||||
}
|
||||
|
||||
/** we don't support the remove command */
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("ShardStrategies don't support remove()");
|
||||
}
|
||||
|
||||
/**
|
||||
* makes the ReadIntervalShard iterable, i.e. usable in a for loop.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Iterator<Shard> iterator() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,99 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Apr 6, 2009
|
||||
* <p/>
|
||||
* Class AdaptiveShard
|
||||
* <p/>
|
||||
* allows you to change the sharding length as you traverse
|
||||
*/
|
||||
class LinearLocusShardStrategy extends LocusShardStrategy {
|
||||
|
||||
// default the next size to 100,000
|
||||
private long nextShardSize = 100000;
|
||||
|
||||
/**
|
||||
* the constructor, taking a seq dictionary to parse out contigs
|
||||
*
|
||||
* @param dic the seq dictionary
|
||||
*/
|
||||
LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize, long limitByCount) {
|
||||
super(dic);
|
||||
this.limitingFactor = limitByCount;
|
||||
this.nextShardSize = startSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* the constructor, constructing from another Locus strategy
|
||||
*
|
||||
* @param strat the shatter to convert from
|
||||
*/
|
||||
LinearLocusShardStrategy(LocusShardStrategy strat) {
|
||||
super(strat);
|
||||
this.nextShardSize = strat.nextShardSize();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The constructor, for a genomic list, start size, and a reference dictionary
|
||||
* @param dic the reference dictionary
|
||||
* @param startSize the starting size of the shard
|
||||
* @param lst locations to iterate from
|
||||
*/
|
||||
LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst, long limitByCount) {
|
||||
super(dic, lst);
|
||||
this.limitingFactor = limitByCount;
|
||||
this.nextShardSize = startSize;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* set the next shards size
|
||||
*
|
||||
* @param size adjust the next size to this
|
||||
*/
|
||||
public void adjustNextShardSize(long size) {
|
||||
nextShardSize = size;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is how the various shards strategies implements their approach
|
||||
*
|
||||
* @return the next shard size
|
||||
*/
|
||||
protected long nextShardSize() {
|
||||
return nextShardSize;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,275 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.Collections;
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Apr 6, 2009
|
||||
* Time: 11:23:17 AM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
*/
|
||||
public abstract class LocusShardStrategy implements ShardStrategy {
|
||||
|
||||
// this stores the seq dictionary, which is a reference for the
|
||||
// lengths and names of contigs, which you need to generate an iterative stratagy
|
||||
protected final SAMSequenceDictionary dic;
|
||||
|
||||
// the current genome location
|
||||
protected GenomeLoc mLoc = null;
|
||||
|
||||
// current seq location
|
||||
protected int seqLoc = 0;
|
||||
|
||||
// the actual last size; this can change based on contig endings
|
||||
protected long lastGenomeLocSize = 0;
|
||||
|
||||
// do we have another contig?
|
||||
private boolean nextContig = false;
|
||||
|
||||
/** our interal list * */
|
||||
private GenomeLocSortedSet intervals = null;
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
private static Logger logger = Logger.getLogger(LocusShardStrategy.class);
|
||||
|
||||
/** the number of iterations before we stop */
|
||||
protected long limitingFactor = -1;
|
||||
private boolean stopDueToLimitingFactor = false;
|
||||
|
||||
/**
|
||||
* the constructor, taking a seq dictionary to parse out contigs
|
||||
*
|
||||
* @param dic the seq dictionary
|
||||
*/
|
||||
LocusShardStrategy(SAMSequenceDictionary dic) {
|
||||
this.dic = dic;
|
||||
limitingFactor = -1;
|
||||
mLoc = GenomeLocParser.createGenomeLoc(0, 0, 0);
|
||||
if (dic.getSequences().size() > 0) {
|
||||
nextContig = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* the copy constructor,
|
||||
*
|
||||
* @param old the old strategy
|
||||
*/
|
||||
LocusShardStrategy(LocusShardStrategy old) {
|
||||
this.dic = old.dic;
|
||||
this.mLoc = old.mLoc;
|
||||
this.seqLoc = old.seqLoc;
|
||||
this.lastGenomeLocSize = old.lastGenomeLocSize;
|
||||
this.nextContig = old.nextContig;
|
||||
this.limitingFactor = old.limitingFactor;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* the constructor, taking a seq dictionary to parse out contigs
|
||||
*
|
||||
* @param dic the seq dictionary
|
||||
* @param intervals file
|
||||
*/
|
||||
LocusShardStrategy(SAMSequenceDictionary dic, GenomeLocSortedSet intervals) {
|
||||
this.dic = dic;
|
||||
this.intervals = intervals.clone();
|
||||
// set the starting point to the beginning interval
|
||||
if (intervals.size() < 1) {
|
||||
throw new IllegalArgumentException("Interval files must contain at least one interval");
|
||||
}
|
||||
GenomeLoc loc = intervals.iterator().next();
|
||||
mLoc = GenomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart() - 1, loc.getStart() - 1);
|
||||
if (dic.getSequences().size() > 0) {
|
||||
nextContig = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Abstract methods that each strategy has to implement
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* This is how the various shards strategies implements their approach, adjusting this value
|
||||
*
|
||||
* @return the next shard size
|
||||
*/
|
||||
protected abstract long nextShardSize();
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* Concrete methods that each strategy does not have to implement
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* get the next shard, based on the return size of nextShardSize
|
||||
*
|
||||
* @return the next shard
|
||||
*/
|
||||
public Shard next() {
|
||||
|
||||
// lets get some background info on the problem
|
||||
long length = dic.getSequence(seqLoc).getSequenceLength();
|
||||
long proposedSize = nextShardSize();
|
||||
long nextStart = mLoc.getStop() + 1;
|
||||
|
||||
if (this.limitingFactor > 0) {
|
||||
if (proposedSize < limitingFactor) {
|
||||
limitingFactor = limitingFactor - proposedSize;
|
||||
} else {
|
||||
proposedSize = limitingFactor;
|
||||
this.stopDueToLimitingFactor = true;
|
||||
}
|
||||
}
|
||||
|
||||
// if we don't have an interval set, use the non interval based approach. Simple, eh?
|
||||
if (this.intervals == null) {
|
||||
return nonIntervaledNext(length, proposedSize, nextStart);
|
||||
} else {
|
||||
return intervaledNext();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Interval based next processing
|
||||
*
|
||||
*
|
||||
* @return the shard that represents this data
|
||||
*/
|
||||
private Shard intervaledNext() {
|
||||
if ((this.intervals == null) || (intervals.isEmpty())) {
|
||||
throw new StingException("LocusShardStrategy: genomic regions list is empty in next() function.");
|
||||
}
|
||||
|
||||
// get the first region in the list
|
||||
GenomeLoc loc = intervals.iterator().next();
|
||||
|
||||
intervals.remove(loc);
|
||||
return new IntervalShard(loc, Shard.ShardType.LOCUS_INTERVAL);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the next shard, if we don't have intervals to traverse over
|
||||
*
|
||||
* @param length the length of the contig
|
||||
* @param proposedSize the proposed size
|
||||
* @param nextStart the next start location
|
||||
*
|
||||
* @return the shard to return to the user
|
||||
*/
|
||||
private Shard nonIntervaledNext(long length, long proposedSize, long nextStart) {
|
||||
// can we fit it into the current seq size?
|
||||
if (nextStart + proposedSize - 1 < length) {
|
||||
lastGenomeLocSize = proposedSize;
|
||||
mLoc = GenomeLocParser.createGenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + proposedSize - 1);
|
||||
return new LocusShard(Collections.singletonList(mLoc));
|
||||
}
|
||||
// else we can't make it in the current location, we have to stitch one together
|
||||
else {
|
||||
// lets find out the remaining size of the current contig
|
||||
long overflow = nextStart + proposedSize - 1 - length;
|
||||
logger.debug("Overflow = " + overflow + " length: " + length);
|
||||
|
||||
// set our last size counter to the remaining size
|
||||
lastGenomeLocSize = proposedSize - overflow;
|
||||
|
||||
// move to the next contig
|
||||
// the next sequence should start at the begining of the next contig
|
||||
Shard ret = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + lastGenomeLocSize - 1)));
|
||||
|
||||
// now jump ahead to the next contig
|
||||
jumpContig();
|
||||
|
||||
// return the shard
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
/** jump to the next contig */
|
||||
private void jumpContig() {
|
||||
++seqLoc;
|
||||
|
||||
if (!(seqLoc < dic.getSequences().size())) {
|
||||
nextContig = false;
|
||||
return;
|
||||
}
|
||||
logger.debug("Next contig, index = " + dic.getSequence(seqLoc).getSequenceIndex());
|
||||
mLoc = GenomeLocParser.createGenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), 0, 0);
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* is there another GenomeLoc to get?
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
if (this.stopDueToLimitingFactor) {
|
||||
return false;
|
||||
}
|
||||
// if we don't have an interval file, use the non interval based approach.
|
||||
if (this.intervals == null) {
|
||||
return nextContig;
|
||||
} else {
|
||||
return (this.intervals.size() > 0);
|
||||
}
|
||||
}
|
||||
|
||||
/** we don't support remove */
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can not remove records from a shard iterator!");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* to be for-each(able), we must implement this method
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Iterator<Shard> iterator() {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* this allows a shard strategy to get the current interval. It's kind of a hack, but for the
|
||||
* locusWindowShardStrategy it was the best approach.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
protected GenomeLoc getCurrentInterval() {
|
||||
if (this.intervals == null || intervals.size() < 1) {
|
||||
return null;
|
||||
}
|
||||
return intervals.iterator().next();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -31,7 +31,7 @@ import java.util.List;
|
|||
*/
|
||||
public interface Shard extends Serializable {
|
||||
enum ShardType {
|
||||
READ, LOCUS, READ_INTERVAL, LOCUS_INTERVAL
|
||||
READ, LOCUS
|
||||
}
|
||||
|
||||
/** @return the genome location represented by this shard */
|
||||
|
|
|
|||
|
|
@ -38,9 +38,6 @@ import java.io.File;
|
|||
*/
|
||||
public class ShardStrategyFactory {
|
||||
public enum SHATTER_STRATEGY {
|
||||
LINEAR,
|
||||
READS,
|
||||
INTERVAL,
|
||||
MONOLITHIC, // Put all of the available data into one shard.
|
||||
LOCUS_EXPERIMENTAL,
|
||||
READS_EXPERIMENTAL
|
||||
|
|
@ -72,12 +69,6 @@ public class ShardStrategyFactory {
|
|||
*/
|
||||
static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, long limitByCount) {
|
||||
switch (strat) {
|
||||
case LINEAR:
|
||||
return new LinearLocusShardStrategy(dic, startingSize, limitByCount);
|
||||
case READS:
|
||||
return new ReadDelimitedReadShardStrategy(startingSize, limitByCount);
|
||||
case INTERVAL:
|
||||
throw new StingException("Requested trategy: " + strat + " doesn't work with the limiting count (-M) command line option");
|
||||
case LOCUS_EXPERIMENTAL:
|
||||
return new IndexDelimitedLocusShardStrategy(readsDataSource,referenceDataSource,null);
|
||||
case READS_EXPERIMENTAL:
|
||||
|
|
@ -116,12 +107,6 @@ public class ShardStrategyFactory {
|
|||
*/
|
||||
static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocSortedSet lst, long limitDataCount) {
|
||||
switch (strat) {
|
||||
case LINEAR:
|
||||
return new LinearLocusShardStrategy(dic, startingSize, lst, limitDataCount);
|
||||
case INTERVAL:
|
||||
return new IntervalShardStrategy(startingSize, lst, Shard.ShardType.LOCUS_INTERVAL);
|
||||
case READS:
|
||||
return new IntervalShardStrategy(startingSize, lst, Shard.ShardType.READ_INTERVAL);
|
||||
case LOCUS_EXPERIMENTAL:
|
||||
return new IndexDelimitedLocusShardStrategy(readsDataSource,referenceDataSource,lst);
|
||||
case READS_EXPERIMENTAL:
|
||||
|
|
|
|||
|
|
@ -1,442 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ReadDelimitedReadShard;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.iterators.*;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* User: aaron
|
||||
* Date: Mar 26, 2009
|
||||
* Time: 2:36:16 PM
|
||||
* <p/>
|
||||
* Converts shards to SAM iterators over the specified region
|
||||
*/
|
||||
public class IndexDrivenSAMDataSource extends SAMDataSource {
|
||||
// used for the reads case, the last count of reads retrieved
|
||||
long readsTaken = 0;
|
||||
|
||||
// our last genome loc position
|
||||
protected GenomeLoc lastReadPos = null;
|
||||
|
||||
// do we take unmapped reads
|
||||
private boolean includeUnmappedReads = true;
|
||||
|
||||
// reads based traversal variables
|
||||
private boolean intoUnmappedReads = false;
|
||||
private int readsSeenAtLastPos = 0;
|
||||
|
||||
/**
|
||||
* A histogram of exactly what reads were removed from the input stream and why.
|
||||
*/
|
||||
private SAMReadViolationHistogram violations = new SAMReadViolationHistogram();
|
||||
|
||||
// A pool of SAM iterators.
|
||||
private SAMResourcePool resourcePool = null;
|
||||
|
||||
private GenomeLoc mLastInterval = null;
|
||||
|
||||
/**
|
||||
* Returns a histogram of reads that were screened out, grouped by the nature of the error.
|
||||
* @return Histogram of reads. Will not be null.
|
||||
*/
|
||||
public SAMReadViolationHistogram getViolationHistogram() {
|
||||
return violations;
|
||||
}
|
||||
|
||||
/**
|
||||
* constructor, given sam files
|
||||
*
|
||||
* @param reads the list of sam files
|
||||
*/
|
||||
public IndexDrivenSAMDataSource( Reads reads ) throws SimpleDataSourceLoadException {
|
||||
super(reads);
|
||||
resourcePool = new SAMResourcePool(reads);
|
||||
}
|
||||
|
||||
/**
|
||||
* Do all BAM files backing this data source have an index? The case where hasIndex() is false
|
||||
* is supported, but only in a few extreme cases.
|
||||
* @return True if an index is present; false otherwise.
|
||||
*/
|
||||
public boolean hasIndex() {
|
||||
return resourcePool.hasIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the sort order of the readers.
|
||||
* @return Sort order. For this datasource, MUST be coordinate.
|
||||
*/
|
||||
public SAMFileHeader.SortOrder getSortOrder() {
|
||||
return SAMFileHeader.SortOrder.coordinate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the (potentially merged) SAM file header.
|
||||
*
|
||||
* @return SAM file header.
|
||||
*/
|
||||
public SAMFileHeader getHeader() {
|
||||
return resourcePool.getHeader();
|
||||
}
|
||||
|
||||
public SAMFileHeader getHeader(SAMReaderID id) {
|
||||
return resourcePool.fileToReaderMap.get(id.samFile).getFileHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the id of the reader which built the given read.
|
||||
* @param read The read to test.
|
||||
* @return ID of the reader.
|
||||
*/
|
||||
public SAMReaderID getReaderID(SAMRecord read) {
|
||||
if(resourcePool.readerToIDMap.containsKey(read.getFileSource().getReader()))
|
||||
return resourcePool.readerToIDMap.get(read.getFileSource().getReader());
|
||||
throw new StingException("Unable to find reader id for record.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns Reads data structure containing information about the reads data sources placed in this pool as well as
|
||||
* information about how they are downsampled, sorted, and filtered
|
||||
* @return
|
||||
*/
|
||||
public Reads getReadsInfo() { return reads; }
|
||||
|
||||
/** Returns true if there are read group duplicates within the merged headers. */
|
||||
public boolean hasReadGroupCollisions() {
|
||||
return resourcePool.getHeaderMerger().hasReadGroupCollisions();
|
||||
}
|
||||
|
||||
/** Returns the read group id that should be used for the input read and RG id. */
|
||||
public String getReadGroupId(final SAMReaderID id, final String originalReadGroupId) {
|
||||
SAMFileReader reader = resourcePool.getFileToReaderMapping().get(id.samFile);
|
||||
return resourcePool.getHeaderMerger().getReadGroupId(reader,originalReadGroupId);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param shard the shard to get data for
|
||||
*
|
||||
* @return an iterator for that region
|
||||
*/
|
||||
public StingSAMIterator seek( Shard shard ) throws SimpleDataSourceLoadException {
|
||||
// setup the iterator pool if it's not setup
|
||||
boolean queryOverlapping = ( shard.getShardType() == Shard.ShardType.READ ) ? false : true;
|
||||
resourcePool.setQueryOverlapping(queryOverlapping);
|
||||
|
||||
StingSAMIterator iterator = null;
|
||||
if (shard.getShardType() == Shard.ShardType.READ) {
|
||||
iterator = seekRead(shard);
|
||||
iterator = applyDecoratingIterators(true,
|
||||
iterator,
|
||||
reads.getDownsamplingMethod().toFraction,
|
||||
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
reads.getSupplementalFilters());
|
||||
} else if (shard.getShardType() == Shard.ShardType.LOCUS) {
|
||||
iterator = seekLocus(shard);
|
||||
iterator = applyDecoratingIterators(false,
|
||||
iterator,
|
||||
reads.getDownsamplingMethod().toFraction,
|
||||
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
reads.getSupplementalFilters());
|
||||
} else if ((shard.getShardType() == Shard.ShardType.LOCUS_INTERVAL) ||
|
||||
(shard.getShardType() == Shard.ShardType.READ_INTERVAL)) {
|
||||
iterator = seekLocus(shard);
|
||||
iterator = applyDecoratingIterators(false,
|
||||
iterator,
|
||||
reads.getDownsamplingMethod().toFraction,
|
||||
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
reads.getSupplementalFilters());
|
||||
|
||||
// add the new overlapping detection iterator, if we have a last interval and we're a read based shard
|
||||
if(shard.getGenomeLocs().size() > 1)
|
||||
throw new StingException("This SAMDataSource does not support multiple intervals within a single shard");
|
||||
GenomeLoc shardGenomeLoc = shard.getGenomeLocs().get(0);
|
||||
if(shard.getShardType() == Shard.ShardType.READ_INTERVAL) {
|
||||
if (mLastInterval != null)
|
||||
iterator = new PlusOneFixIterator(shardGenomeLoc,new IntervalOverlapIterator(iterator,mLastInterval,false));
|
||||
else
|
||||
iterator = new PlusOneFixIterator(shardGenomeLoc,iterator);
|
||||
mLastInterval = shardGenomeLoc;
|
||||
}
|
||||
} else {
|
||||
|
||||
throw new StingException("seek: Unknown shard type");
|
||||
}
|
||||
|
||||
return iterator;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* seekLocus
|
||||
* </p>
|
||||
*
|
||||
* @param shard the shard containing the genome location to extract data for
|
||||
*
|
||||
* @return an iterator for that region
|
||||
*/
|
||||
private StingSAMIterator seekLocus( Shard shard ) throws SimpleDataSourceLoadException {
|
||||
if(shard instanceof MonolithicShard)
|
||||
return createIterator(new EntireStream());
|
||||
|
||||
if( getHeader().getSequenceDictionary().getSequences().size() == 0 )
|
||||
throw new StingException("Unable to seek to the given locus; reads data source has no alignment information.");
|
||||
|
||||
if(shard.getGenomeLocs().size() > 1)
|
||||
throw new StingException("This SAMDataSource does not support multiple intervals within a single shard");
|
||||
GenomeLoc shardGenomeLoc = shard.getGenomeLocs().get(0);
|
||||
|
||||
return createIterator( new MappedStreamSegment(shardGenomeLoc) );
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* seek
|
||||
* </p>
|
||||
*
|
||||
* @param shard the read shard to extract from
|
||||
*
|
||||
* @return an iterator for that region
|
||||
*/
|
||||
private StingSAMIterator seekRead( Shard shard ) throws SimpleDataSourceLoadException {
|
||||
if(shard instanceof MonolithicShard)
|
||||
return createIterator(new EntireStream());
|
||||
|
||||
ReadDelimitedReadShard readShard = (ReadDelimitedReadShard)shard;
|
||||
StingSAMIterator iter = null;
|
||||
|
||||
// If there are no entries in the sequence dictionary, there can't possibly be any unmapped reads. Force state to 'unmapped'.
|
||||
if( isSequenceDictionaryEmpty() )
|
||||
intoUnmappedReads = true;
|
||||
|
||||
if (!intoUnmappedReads) {
|
||||
if (lastReadPos == null) {
|
||||
lastReadPos = GenomeLocParser.createGenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE);
|
||||
iter = createIterator(new MappedStreamSegment(lastReadPos));
|
||||
return InitialReadIterator(readShard.getSize(), iter);
|
||||
} else {
|
||||
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
|
||||
iter = fastMappedReadSeek(readShard.getSize(), StingSAMIteratorAdapter.adapt(reads, createIterator(new MappedStreamSegment(lastReadPos))));
|
||||
}
|
||||
|
||||
if (intoUnmappedReads && !includeUnmappedReads)
|
||||
readShard.signalDone();
|
||||
}
|
||||
|
||||
if (intoUnmappedReads && includeUnmappedReads) {
|
||||
if (iter != null)
|
||||
iter.close();
|
||||
iter = toUnmappedReads(readShard.getSize());
|
||||
if (!iter.hasNext())
|
||||
readShard.signalDone();
|
||||
}
|
||||
|
||||
return iter;
|
||||
}
|
||||
|
||||
/**
|
||||
* If we're in by-read mode, this indicates if we want
|
||||
* to see unmapped reads too. Only seeing mapped reads
|
||||
* is much faster, but most BAM files have significant
|
||||
* unmapped read counts.
|
||||
*
|
||||
* @param seeUnMappedReads true to see unmapped reads, false otherwise
|
||||
*/
|
||||
public void viewUnmappedReads( boolean seeUnMappedReads ) {
|
||||
includeUnmappedReads = seeUnMappedReads;
|
||||
}
|
||||
|
||||
/**
|
||||
* For unit testing, add a custom iterator pool.
|
||||
*
|
||||
* @param resourcePool Custom mock iterator pool.
|
||||
*/
|
||||
void setResourcePool( SAMResourcePool resourcePool ) {
|
||||
this.resourcePool = resourcePool;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve unmapped reads.
|
||||
*
|
||||
* @param readCount how many reads to retrieve
|
||||
*
|
||||
* @return the bounded iterator that you can use to get the intervaled reads from
|
||||
*/
|
||||
StingSAMIterator toUnmappedReads( long readCount ) {
|
||||
StingSAMIterator iter = createIterator(new UnmappedStreamSegment(readsTaken, readCount));
|
||||
readsTaken += readCount;
|
||||
return iter;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A seek function for mapped reads.
|
||||
*
|
||||
* @param readCount how many reads to retrieve
|
||||
* @param iter the iterator to use, seeked to the correct start location
|
||||
*
|
||||
* @return the bounded iterator that you can use to get the intervaled reads from. Will be a zero-length
|
||||
* iterator if no reads are available.
|
||||
* @throws SimpleDataSourceLoadException
|
||||
*/
|
||||
StingSAMIterator fastMappedReadSeek( long readCount, StingSAMIterator iter ) throws SimpleDataSourceLoadException {
|
||||
BoundedReadIterator bound;
|
||||
correctForReadPileupSeek(iter);
|
||||
if (readsTaken == 0) {
|
||||
return InitialReadIterator(readCount, iter);
|
||||
}
|
||||
int x = 0;
|
||||
SAMRecord rec = null;
|
||||
|
||||
// Assuming that lastReadPos should never be null, because this is a mappedReadSeek
|
||||
// and initial queries are handled by the previous conditional.
|
||||
int lastContig = lastReadPos.getContigIndex();
|
||||
int lastPos = (int)lastReadPos.getStart();
|
||||
|
||||
while (x < readsTaken) {
|
||||
if (iter.hasNext()) {
|
||||
rec = iter.next();
|
||||
if (lastContig == rec.getReferenceIndex() && lastPos == rec.getAlignmentStart()) ++this.readsSeenAtLastPos;
|
||||
else this.readsSeenAtLastPos = 1;
|
||||
lastPos = rec.getAlignmentStart();
|
||||
++x;
|
||||
} else {
|
||||
iter.close();
|
||||
|
||||
// jump contigs
|
||||
lastReadPos = GenomeLocParser.toNextContig(lastReadPos);
|
||||
if (lastReadPos == null) {
|
||||
// check to see if we're using unmapped reads, if not return, we're done
|
||||
readsTaken = 0;
|
||||
intoUnmappedReads = true;
|
||||
|
||||
// fastMappedReadSeek must return an iterator, even if that iterator iterates through nothing.
|
||||
return new NullSAMIterator(reads);
|
||||
} else {
|
||||
readsTaken = readCount;
|
||||
readsSeenAtLastPos = 0;
|
||||
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
|
||||
CloseableIterator<SAMRecord> ret = createIterator(new MappedStreamSegment(lastReadPos));
|
||||
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, ret), readCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if we're off the end of the last contig (into unmapped territory)
|
||||
if (rec != null && rec.getAlignmentStart() == 0) {
|
||||
readsTaken += readCount;
|
||||
intoUnmappedReads = true;
|
||||
}
|
||||
// else we're not off the end, store our location
|
||||
else if (rec != null) {
|
||||
int stopPos = rec.getAlignmentStart();
|
||||
if (stopPos < lastReadPos.getStart()) {
|
||||
lastReadPos = GenomeLocParser.createGenomeLoc(lastReadPos.getContigIndex() + 1, stopPos, stopPos);
|
||||
} else {
|
||||
lastReadPos = GenomeLocParser.setStart(lastReadPos,rec.getAlignmentStart());
|
||||
}
|
||||
}
|
||||
// in case we're run out of reads, get out
|
||||
else {
|
||||
throw new StingException("Danger: weve run out reads in fastMappedReadSeek");
|
||||
//return null;
|
||||
}
|
||||
bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, iter), readCount);
|
||||
|
||||
|
||||
// return the iterator
|
||||
return bound;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the BAM file is completely unsequenced. Requires that the resource pool be initialized.
|
||||
* @return True if the sequence dictionary is completely empty. False otherwise.
|
||||
*/
|
||||
private boolean isSequenceDictionaryEmpty() {
|
||||
return getHeader().getSequenceDictionary().isEmpty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Even though the iterator has seeked to the correct location, there may be multiple reads at that location,
|
||||
* and we may have given some of them out already. Move the iterator to the correct location using the readsAtLastPos variable
|
||||
*
|
||||
* @param iter the iterator
|
||||
*/
|
||||
private void correctForReadPileupSeek( StingSAMIterator iter ) {
|
||||
// move the number of reads we read from the last pos
|
||||
boolean atLeastOneReadSeen = false; // we have a problem where some chomesomes don't have a single read (i.e. the chrN_random chrom.)
|
||||
for(int i = 0; i < this.readsSeenAtLastPos && iter.hasNext(); i++,iter.next())
|
||||
atLeastOneReadSeen = true;
|
||||
if (readsSeenAtLastPos > 0 && !atLeastOneReadSeen) {
|
||||
throw new SimpleDataSourceLoadException("Seek problem: reads at last position count != 0");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* set the initial iterator
|
||||
*
|
||||
* @param readCount the number of reads
|
||||
* @param iter the merging iterator
|
||||
*
|
||||
* @return a bounded read iterator at the first read position in the file.
|
||||
*/
|
||||
private BoundedReadIterator InitialReadIterator( long readCount, CloseableIterator<SAMRecord> iter ) {
|
||||
BoundedReadIterator bound;
|
||||
bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, iter), readCount);
|
||||
this.readsTaken = readCount;
|
||||
return bound;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an iterator over the selected segment, from a resource pulled from the pool.
|
||||
* @param segment Segment over which to gather reads.
|
||||
* @return An iterator over just the reads in the given segment.
|
||||
*/
|
||||
private StingSAMIterator createIterator( DataStreamSegment segment ) {
|
||||
StingSAMIterator iterator = resourcePool.iterator(segment);
|
||||
StingSAMIterator malformedWrappedIterator = new MalformedSAMFilteringIterator( getHeader(), iterator, violations );
|
||||
StingSAMIterator readWrappingIterator = new ReadFormattingIterator(malformedWrappedIterator);
|
||||
return readWrappingIterator;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -1,305 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.iterators.*;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
|
||||
/**
|
||||
* Abstract class that models a current state in some category of reads.
|
||||
* @author hanna
|
||||
* @version 0.1
|
||||
*/
|
||||
abstract class ReadStreamPointer {
|
||||
/**
|
||||
* Describes the source of reads data.
|
||||
*/
|
||||
protected final Reads sourceInfo;
|
||||
|
||||
/**
|
||||
* Open handles to the reads info.
|
||||
*/
|
||||
protected final SamFileHeaderMerger headerMerger;
|
||||
|
||||
public ReadStreamPointer( Reads sourceInfo, SamFileHeaderMerger headerMerger ) {
|
||||
this.sourceInfo = sourceInfo;
|
||||
this.headerMerger = headerMerger;
|
||||
}
|
||||
|
||||
/**
|
||||
* Can this pointer access the provided segment efficiently?
|
||||
* @param segment Segment to test.
|
||||
* @return True if it would be quick for this segment to access the given data.
|
||||
* False if accessing this data would require some sort of reinitialization.
|
||||
*/
|
||||
public abstract boolean canAccessSegmentEfficiently(DataStreamSegment segment);
|
||||
|
||||
/**
|
||||
* Close this resource, destroying all file handles.
|
||||
*/
|
||||
public void close() {
|
||||
for (SAMFileReader reader : headerMerger.getReaders())
|
||||
reader.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns Reads data structure containing information about the reads data sources as well as
|
||||
* information about how they are downsampled, sorted, and filtered
|
||||
* @return
|
||||
*/
|
||||
public Reads getReadsInfo() {
|
||||
return sourceInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns header merger: a class that keeps the mapping between original read groups and read groups
|
||||
* of the merged stream; merger also provides access to the individual file readers (and hence headers
|
||||
* too) maintained by the system.
|
||||
* @return
|
||||
*/
|
||||
public SamFileHeaderMerger getHeaderMerger() {
|
||||
return headerMerger;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove an iterator from service.
|
||||
* @param iterator The iterator to remove from service. Must not be null.
|
||||
*/
|
||||
public abstract void destroy( StingSAMIterator iterator );
|
||||
|
||||
/**
|
||||
* Get a stream of all the reads that overlap a given segment.
|
||||
* @param segment Segment to check for overlaps.
|
||||
* @return An iterator over all reads overlapping the given segment.
|
||||
*/
|
||||
public abstract StingSAMIterator getReadsOverlapping( DataStreamSegment segment );
|
||||
|
||||
/**
|
||||
* Get a stream of all the reads that are completely contained by a given segment.
|
||||
* The segment can be mapped or unmapped.
|
||||
* @param segment Segment to check for containment..
|
||||
* @return An iterator over all reads contained by the given segment.
|
||||
*/
|
||||
public abstract StingSAMIterator getReadsContainedBy( DataStreamSegment segment );
|
||||
}
|
||||
|
||||
class MappedReadStreamPointer extends ReadStreamPointer {
|
||||
|
||||
public MappedReadStreamPointer( Reads sourceInfo, SamFileHeaderMerger headerMerger ) {
|
||||
super( sourceInfo, headerMerger );
|
||||
}
|
||||
|
||||
/**
|
||||
* MappedReadStreamPointers can access any segment efficiently. Always return true.
|
||||
* @param segment Segment to test.
|
||||
* @return True.
|
||||
*/
|
||||
public boolean canAccessSegmentEfficiently(DataStreamSegment segment) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void destroy( StingSAMIterator iterator ) {
|
||||
iterator.close();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public StingSAMIterator getReadsOverlapping( DataStreamSegment segment ) {
|
||||
if(!(segment instanceof MappedStreamSegment))
|
||||
throw new UnsupportedOperationException("MappedReadStreamPointer cannot get reads overlapping an unmapped stream segment");
|
||||
MappedStreamSegment mappedSegment = (MappedStreamSegment)segment;
|
||||
|
||||
MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo );
|
||||
|
||||
// The getStop() + 1 is a hack to work around an old bug in the way Picard created SAM files where queries
|
||||
// over a given interval would occasionally not pick up the last read in that interval.
|
||||
mergingIterator.queryOverlapping( mappedSegment.locus.getContig(),
|
||||
(int)mappedSegment.locus.getStart(),
|
||||
(int)mappedSegment.locus.getStop()+1);
|
||||
|
||||
return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public StingSAMIterator getReadsContainedBy( DataStreamSegment segment ) {
|
||||
if( !(segment instanceof MappedStreamSegment) )
|
||||
throw new StingException("Trying to access unmapped content from a mapped read stream pointer");
|
||||
MappedStreamSegment mappedSegment = (MappedStreamSegment)segment;
|
||||
MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo );
|
||||
// NOTE: explicitly not using the queryOverlapping hack above since, according to the above criteria,
|
||||
// we'd only miss reads that are one base long when performing a contained query.
|
||||
mergingIterator.queryContained( mappedSegment.locus.getContig(),
|
||||
(int)mappedSegment.locus.getStart(),
|
||||
(int)mappedSegment.locus.getStop()+1);
|
||||
return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a mapped read stream pointer to an unmapped read stream pointer, transferring ownership
|
||||
* of the underlying file handles to the new container.
|
||||
* After doing this conversion, the source MappedReadStreamPointer should not be used.
|
||||
* @return
|
||||
*/
|
||||
public UnmappedReadStreamPointer toUnmappedReadStreamPointer() {
|
||||
return new UnmappedReadStreamPointer( sourceInfo, headerMerger );
|
||||
}
|
||||
}
|
||||
|
||||
class UnmappedReadStreamPointer extends ReadStreamPointer {
|
||||
/**
|
||||
* A pointer to the current position of this iterator in the read stream.
|
||||
*/
|
||||
private PositionTrackingIterator unmappedIterator = null;
|
||||
|
||||
public UnmappedReadStreamPointer( Reads sourceInfo, SamFileHeaderMerger headerMerger ) {
|
||||
super( sourceInfo, headerMerger );
|
||||
|
||||
MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo );
|
||||
mergingIterator.queryUnmappedReads();
|
||||
unmappedIterator = new PositionTrackingIterator( sourceInfo, mergingIterator, 0L );
|
||||
}
|
||||
|
||||
/**
|
||||
* UnmappedReadStreamPointers are streams and can therefore access 'future' reads in the file quickly,
|
||||
* but reads already seen are difficult to seek to.
|
||||
* @param segment Segment to test.
|
||||
* @return True if this DataStreamSegment follows the current position.
|
||||
*/
|
||||
public boolean canAccessSegmentEfficiently(DataStreamSegment segment) {
|
||||
if( !(segment instanceof UnmappedStreamSegment) )
|
||||
return false;
|
||||
return unmappedIterator.getPosition() <= ((UnmappedStreamSegment)segment).position;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public StingSAMIterator getReadsOverlapping( DataStreamSegment segment ) {
|
||||
throw new UnsupportedOperationException("Unable to determine overlapped reads of an unmapped segment");
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public StingSAMIterator getReadsContainedBy( DataStreamSegment segment ) {
|
||||
if( !(segment instanceof UnmappedStreamSegment) )
|
||||
throw new StingException("Trying to access mapped content from an unmapped read stream pointer");
|
||||
|
||||
UnmappedStreamSegment unmappedSegment = (UnmappedStreamSegment)segment;
|
||||
|
||||
// Force the iterator to the next pending position.
|
||||
while(unmappedIterator.getPosition() < unmappedSegment.position)
|
||||
unmappedIterator.next();
|
||||
|
||||
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(sourceInfo,unmappedIterator), unmappedSegment.size);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public void close() {
|
||||
if( unmappedIterator != null )
|
||||
unmappedIterator.close();
|
||||
super.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void destroy( StingSAMIterator iterator ) {
|
||||
// Don't destroy the iterator; reuse it.
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class EntireReadStreamPointer extends ReadStreamPointer {
|
||||
/**
|
||||
* Create a new pointer that can return info about the entire read stream.
|
||||
* @param sourceInfo Source info for the reads.
|
||||
* @param headerMerger Header merging apparatus.
|
||||
*
|
||||
*/
|
||||
public EntireReadStreamPointer( Reads sourceInfo, SamFileHeaderMerger headerMerger ) {
|
||||
super( sourceInfo, headerMerger );
|
||||
}
|
||||
|
||||
/**
|
||||
* An EntireReadStreamPointer can only efficiently access the entire file.
|
||||
* @param segment Segment to test.
|
||||
* @return true if requesting the entire stream.
|
||||
*/
|
||||
public boolean canAccessSegmentEfficiently(DataStreamSegment segment) {
|
||||
return segment instanceof EntireStream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a stream of all the reads that overlap a given segment.
|
||||
* @param segment Segment to check for overlaps.
|
||||
* @return An iterator over all reads overlapping the given segment.
|
||||
*/
|
||||
@Override
|
||||
public StingSAMIterator getReadsOverlapping( DataStreamSegment segment ) {
|
||||
if(!(segment instanceof EntireStream))
|
||||
throw new StingException("EntireReadStreamPointer can only get reads overlapping the entire stream.");
|
||||
return StingSAMIteratorAdapter.adapt(sourceInfo,new MergingSamRecordIterator2(headerMerger, sourceInfo));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a stream of all the reads that are completely contained by a given segment.
|
||||
* @param segment Segment to check for containment..
|
||||
* @return An iterator over all reads contained by the given segment.
|
||||
*/
|
||||
@Override
|
||||
public StingSAMIterator getReadsContainedBy( DataStreamSegment segment ) {
|
||||
if(!(segment instanceof EntireStream))
|
||||
throw new StingException("EntireReadStreamPointer can only get reads contained by the entire stream.");
|
||||
return StingSAMIteratorAdapter.adapt(sourceInfo,new MergingSamRecordIterator2(headerMerger, sourceInfo));
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void destroy( StingSAMIterator iterator ) {
|
||||
iterator.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,196 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* Represents a single stream of read data. Used to represent the state of the stream and determine
|
||||
* whether the state of this resource is such that it can field the desired query.
|
||||
* @author hanna
|
||||
* @version 0.1
|
||||
*/
|
||||
class ReadStreamResource {
|
||||
final static boolean eagerDecode = true;
|
||||
|
||||
/**
|
||||
* Do all the constituent components of this ReadStreamResource have indices?
|
||||
* In general, BAM files without indices are not supported, but in a few specific
|
||||
* cases we do allow this for the Picard pipeline.
|
||||
* @return true if all BAM files have indices; false otherwise.
|
||||
*/
|
||||
protected boolean hasIndex() {
|
||||
for(SAMFileReader reader: readStreamPointer.getHeaderMerger().getReaders()) {
|
||||
if(!reader.hasIndex())
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
protected final boolean hasIndex;
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
protected static Logger logger = Logger.getLogger(ReadStreamPointer.class);
|
||||
|
||||
/**
|
||||
* The (possibly merged) header for the input fileset.
|
||||
*/
|
||||
private final SAMFileHeader header;
|
||||
|
||||
/**
|
||||
* A pointer to the current location of the file.
|
||||
*/
|
||||
private ReadStreamPointer readStreamPointer = null;
|
||||
|
||||
/**
|
||||
* A mapping from original input file to merged read group record ids
|
||||
*/
|
||||
private Map<File, SAMFileReader> fileToReaderMap = null;
|
||||
|
||||
/**
|
||||
* A mapping from reader back to the ID uniquely identifying this input file.
|
||||
*/
|
||||
private Map<SAMFileReader, SAMReaderID> readerToIDMap = null;
|
||||
|
||||
public ReadStreamResource( Reads sourceInfo ) {
|
||||
SamFileHeaderMerger headerMerger = createHeaderMerger(sourceInfo, SAMFileHeader.SortOrder.coordinate);
|
||||
|
||||
this.header = headerMerger.getMergedHeader();
|
||||
|
||||
boolean indexPresent = true;
|
||||
for(SAMFileReader reader: headerMerger.getReaders()) {
|
||||
if(!reader.hasIndex())
|
||||
indexPresent = false;
|
||||
}
|
||||
hasIndex = indexPresent;
|
||||
|
||||
if(hasIndex)
|
||||
readStreamPointer = new MappedReadStreamPointer(sourceInfo, headerMerger);
|
||||
else
|
||||
readStreamPointer = new EntireReadStreamPointer(sourceInfo, headerMerger);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the header information for the read stream.
|
||||
* @return Header information for the read stream.
|
||||
*/
|
||||
public SAMFileHeader getHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns Reads data structure containing information about the reads data sources as well as
|
||||
* information about how they are downsampled, sorted, and filtered
|
||||
* @return the Reads object
|
||||
*/
|
||||
public Reads getReadsInfo() { return readStreamPointer.getReadsInfo(); }
|
||||
|
||||
/**
|
||||
* Returns header merger: a class that keeps the mapping between original read groups and read groups
|
||||
* of the merged stream; merger also provides access to the individual file readers (and hence headers
|
||||
* too) maintained by the system.
|
||||
* @return the header merger
|
||||
*/
|
||||
public SamFileHeaderMerger getHeaderMerger() { return readStreamPointer.getHeaderMerger(); }
|
||||
|
||||
public boolean canAccessSegmentEfficiently(DataStreamSegment segment) {
|
||||
return readStreamPointer.canAccessSegmentEfficiently(segment);
|
||||
}
|
||||
|
||||
public void close() {
|
||||
readStreamPointer.close();
|
||||
}
|
||||
|
||||
public void destroy( StingSAMIterator iterator ) {
|
||||
readStreamPointer.destroy(iterator);
|
||||
}
|
||||
|
||||
public StingSAMIterator getReadsContainedBy( DataStreamSegment segment ) {
|
||||
if( readStreamPointer instanceof MappedReadStreamPointer && segment instanceof UnmappedStreamSegment )
|
||||
readStreamPointer = ((MappedReadStreamPointer)readStreamPointer).toUnmappedReadStreamPointer();
|
||||
return readStreamPointer.getReadsContainedBy(segment);
|
||||
}
|
||||
|
||||
public StingSAMIterator getReadsOverlapping( DataStreamSegment segment ) {
|
||||
return readStreamPointer.getReadsOverlapping(segment);
|
||||
}
|
||||
|
||||
public Map<File, SAMFileReader> getFileToReaderMapping() {
|
||||
return fileToReaderMap;
|
||||
}
|
||||
|
||||
public Map<SAMFileReader,SAMReaderID> getReaderToIDMapping() {
|
||||
return readerToIDMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* A private function that, given the internal file list, generates a merging construct for
|
||||
* all available files.
|
||||
* @param reads source information about the reads.
|
||||
* @param SORT_ORDER sort order for the reads.
|
||||
* @return a list of SAMFileReaders that represent the stored file names
|
||||
* @throws SimpleDataSourceLoadException if the file cannot be opened.
|
||||
*/
|
||||
private SamFileHeaderMerger createHeaderMerger( Reads reads, SAMFileHeader.SortOrder SORT_ORDER )
|
||||
throws SimpleDataSourceLoadException {
|
||||
|
||||
// right now this is pretty damn heavy, it copies the file list into a reader list every time
|
||||
List<SAMFileReader> lst = new ArrayList<SAMFileReader>();
|
||||
fileToReaderMap = new HashMap<File, SAMFileReader>();
|
||||
readerToIDMap = new HashMap<SAMFileReader,SAMReaderID>();
|
||||
for (File f : reads.getReadsFiles()) {
|
||||
SAMFileReader reader = new SAMFileReader(f, eagerDecode);
|
||||
fileToReaderMap.put(f, reader);
|
||||
readerToIDMap.put(reader,new SAMReaderID(f));
|
||||
reader.setValidationStringency(reads.getValidationStringency());
|
||||
|
||||
final SAMFileHeader header = reader.getFileHeader();
|
||||
logger.debug(String.format("Sort order is: " + header.getSortOrder()));
|
||||
|
||||
if (reader.getFileHeader().getReadGroups().size() < 1) {
|
||||
//logger.warn("Setting header in reader " + f.getName());
|
||||
SAMReadGroupRecord rec = new SAMReadGroupRecord(f.getName());
|
||||
rec.setLibrary(f.getName());
|
||||
rec.setSample(f.getName());
|
||||
|
||||
reader.getFileHeader().addReadGroup(rec);
|
||||
}
|
||||
|
||||
lst.add(reader);
|
||||
}
|
||||
|
||||
return new SamFileHeaderMerger(lst,SORT_ORDER,true);
|
||||
}
|
||||
}
|
||||
|
|
@ -167,18 +167,6 @@ public abstract class SAMDataSource implements SimpleDataSource {
|
|||
*/
|
||||
public abstract StingSAMIterator seek(Shard shard);
|
||||
|
||||
/**
|
||||
* If we're in by-read mode, this indicates if we want
|
||||
* to see unmapped reads too. Only seeing mapped reads
|
||||
* is much faster, but most BAM files have significant
|
||||
* unmapped read counts.
|
||||
*
|
||||
* @param seeUnMappedReads true to see unmapped reads, false otherwise
|
||||
*/
|
||||
public void viewUnmappedReads( boolean seeUnMappedReads ) {
|
||||
includeUnmappedReads = seeUnMappedReads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter reads based on user-specified criteria.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -1,193 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* Maintain a pool of resources of accessors to SAM read data. SAMFileReaders and
|
||||
* headers are actually quite expensive to open, so this class manages the mechanics
|
||||
* of keeping them open and reusing them.
|
||||
* @author hanna
|
||||
* @version 0.1
|
||||
*/
|
||||
@Deprecated
|
||||
class SAMResourcePool extends ResourcePool<ReadStreamResource, StingSAMIterator> {
|
||||
/** our log, which we want to capture anything from this class */
|
||||
protected static Logger logger = Logger.getLogger(SAMResourcePool.class);
|
||||
|
||||
/** Source information about the reads. */
|
||||
protected Reads reads;
|
||||
protected SamFileHeaderMerger headerMerger;
|
||||
protected Map<File, SAMFileReader> fileToReaderMap;
|
||||
protected Map<SAMFileReader, SAMReaderID> readerToIDMap;
|
||||
|
||||
/**
|
||||
* Do all the constituent BAM files have indices? We support some very limited
|
||||
* cases where not all BAM files have indices available.
|
||||
*/
|
||||
protected final boolean hasIndex;
|
||||
|
||||
/** Is this a by-reads traversal or a by-locus? */
|
||||
protected boolean queryOverlapping;
|
||||
|
||||
/** File header for the combined file. */
|
||||
protected final SAMFileHeader header;
|
||||
|
||||
public SAMResourcePool( Reads reads ) {
|
||||
this.reads = reads;
|
||||
this.queryOverlapping = true;
|
||||
|
||||
ReadStreamResource streamResource = createNewResource();
|
||||
this.header = streamResource.getHeader();
|
||||
this.headerMerger = streamResource.getHeaderMerger();
|
||||
this.hasIndex = streamResource.hasIndex();
|
||||
this.fileToReaderMap = streamResource.getFileToReaderMapping();
|
||||
this.readerToIDMap = streamResource.getReaderToIDMapping();
|
||||
|
||||
// Add this resource to the pool.
|
||||
this.addNewResource(streamResource);
|
||||
}
|
||||
|
||||
/** Get the combined header for all files in the iterator pool. */
|
||||
public SAMFileHeader getHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns Reads data structure containing information about the reads data sources placed in this pool as well as
|
||||
* information about how they are downsampled, sorted, and filtered
|
||||
* @return
|
||||
*/
|
||||
public Reads getReadsInfo() { return reads; }
|
||||
|
||||
/**
|
||||
* Returns a mapping from original input files to the SAMFileReaders
|
||||
*
|
||||
* @return the mapping
|
||||
*/
|
||||
public Map<File, SAMFileReader> getFileToReaderMapping() {
|
||||
return fileToReaderMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns header merger: a class that keeps the mapping between original read groups and read groups
|
||||
* of the merged stream; merger also provides access to the individual file readers (and hence headers
|
||||
* too) maintained by the system.
|
||||
* @return
|
||||
*/
|
||||
public SamFileHeaderMerger getHeaderMerger() { return headerMerger; }
|
||||
|
||||
protected ReadStreamResource selectBestExistingResource( DataStreamSegment segment, List<ReadStreamResource> resources ) {
|
||||
for (ReadStreamResource resource : resources) {
|
||||
if (resource.canAccessSegmentEfficiently(segment)) {
|
||||
return resource;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
protected ReadStreamResource createNewResource() {
|
||||
return new ReadStreamResource(reads);
|
||||
}
|
||||
|
||||
protected StingSAMIterator createIteratorFromResource( DataStreamSegment segment, ReadStreamResource streamResource ) {
|
||||
StingSAMIterator iterator = null;
|
||||
|
||||
if (!queryOverlapping)
|
||||
iterator = streamResource.getReadsContainedBy(segment);
|
||||
else
|
||||
iterator = streamResource.getReadsOverlapping(segment);
|
||||
|
||||
return new ReleasingIterator( streamResource, iterator );
|
||||
}
|
||||
|
||||
protected void closeResource( ReadStreamResource resource ) {
|
||||
resource.close();
|
||||
}
|
||||
|
||||
private class ReleasingIterator implements StingSAMIterator {
|
||||
/**
|
||||
* The resource acting as the source of the data.
|
||||
*/
|
||||
private final ReadStreamResource resource;
|
||||
|
||||
/**
|
||||
* The iterator to wrap.
|
||||
*/
|
||||
private final StingSAMIterator wrappedIterator;
|
||||
|
||||
public Reads getSourceInfo() {
|
||||
return wrappedIterator.getSourceInfo();
|
||||
}
|
||||
|
||||
public ReleasingIterator( ReadStreamResource resource, StingSAMIterator wrapped ) {
|
||||
this.resource = resource;
|
||||
this.wrappedIterator = wrapped;
|
||||
}
|
||||
|
||||
public ReleasingIterator iterator() {
|
||||
return this;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can't remove from a StingSAMIterator");
|
||||
}
|
||||
|
||||
public void close() {
|
||||
resource.destroy(wrappedIterator);
|
||||
release(this);
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return wrappedIterator.hasNext();
|
||||
}
|
||||
|
||||
public SAMRecord next() {
|
||||
return wrappedIterator.next();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isQueryOverlapping() {
|
||||
return queryOverlapping;
|
||||
}
|
||||
|
||||
public void setQueryOverlapping( boolean queryOverlapping ) {
|
||||
this.queryOverlapping = queryOverlapping;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -57,7 +57,7 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
|
||||
for (Shard shard : shardStrategy) {
|
||||
// New experimental code for managing locus intervals.
|
||||
if(shard.getShardType() == Shard.ShardType.LOCUS || shard.getShardType() == Shard.ShardType.LOCUS_INTERVAL) {
|
||||
if(shard.getShardType() == Shard.ShardType.LOCUS) {
|
||||
LocusWalker lWalker = (LocusWalker)walker;
|
||||
WindowMaker windowMaker = new WindowMaker(getReadIterator(shard), shard.getGenomeLocs(), walker.getMandatoryReadFilters(), lWalker.getDiscards());
|
||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.iterators;
|
|||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.picard.sam.MergingSamRecordIterator;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
|
|
@ -87,8 +88,8 @@ public class BoundedReadIterator implements StingSAMIterator {
|
|||
public SAMFileHeader getHeader() {
|
||||
// todo: this is bad, we need an iterface out there for samrecords that supports getting the header,
|
||||
// regardless of the merging
|
||||
if (iterator instanceof MergingSamRecordIterator2)
|
||||
return ((MergingSamRecordIterator2)iterator).getHeader();
|
||||
if (iterator instanceof MergingSamRecordIterator)
|
||||
return ((MergingSamRecordIterator)iterator).getMergedHeader();
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,113 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class DuplicateDetectorIterator
|
||||
*
|
||||
* remove reads that overlap the passed in interval, yea!
|
||||
*
|
||||
*/
|
||||
public class IntervalOverlapIterator implements StingSAMIterator {
|
||||
|
||||
// our wrapped iterator
|
||||
private final StingSAMIterator mIter;
|
||||
private final boolean throwException;
|
||||
|
||||
// storage for the next record
|
||||
private SAMRecord mNextRecord = null;
|
||||
|
||||
// the genomic location which we filter on
|
||||
private final GenomeLoc mLoc;
|
||||
|
||||
/**
|
||||
* Create a DuplicateDetectorIterator from another sam iterator
|
||||
* @param iter something that implements StingSAMIterator
|
||||
* @param blowUpOnDup if we find a dup, do we throw an exception (blow up) or do we drop it
|
||||
*/
|
||||
public IntervalOverlapIterator(StingSAMIterator iter, GenomeLoc filterLocation, boolean blowUpOnDup) {
|
||||
this.mIter = iter;
|
||||
this.throwException = blowUpOnDup;
|
||||
this.mLoc = filterLocation;
|
||||
if (iter.hasNext()) {
|
||||
next();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets source information for the reads. Contains information about the original reads
|
||||
* files, plus information about downsampling, etc.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Reads getSourceInfo() {
|
||||
return mIter.getSourceInfo();
|
||||
}
|
||||
|
||||
/**
|
||||
* close this iterator
|
||||
*/
|
||||
public void close() {
|
||||
if (mIter != null) mIter.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* do we have a next?
|
||||
* @return true if yes, false if not
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
return (mNextRecord != null);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the next record
|
||||
* @return a SAMRecord
|
||||
*/
|
||||
public SAMRecord next() {
|
||||
SAMRecord ret = mNextRecord;
|
||||
while (mIter.hasNext()) {
|
||||
mNextRecord = mIter.next();
|
||||
if (!isOverlaping(mNextRecord)) return ret;
|
||||
}
|
||||
mNextRecord = null;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* not supported
|
||||
*/
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("You can't call remove, so, like, I guess please don't");
|
||||
}
|
||||
|
||||
/**
|
||||
* create an iterator out of the this type
|
||||
* @return this!
|
||||
*/
|
||||
public Iterator<SAMRecord> iterator() {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* determine if a read overlaps the specified interval that was passed in
|
||||
* @param rec the read
|
||||
* @return true if it overlaps, false otherwise
|
||||
*/
|
||||
private boolean isOverlaping(SAMRecord rec) {
|
||||
boolean overlap = mLoc.overlapsP(GenomeLocParser.createGenomeLoc(rec));
|
||||
if (overlap && this.throwException)
|
||||
throw new StingException("IntervalOverlapIterator found a overlapping read " + rec.getReadName() + " with overlap " + this.mLoc.toString());
|
||||
return overlap;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -43,337 +43,6 @@ import java.util.Iterator;
|
|||
import java.util.List;
|
||||
import java.util.PriorityQueue;
|
||||
|
||||
/**
|
||||
* Provides an iterator interface for merging multiple underlying iterators into a single
|
||||
* iterable stream. The underlying iterators/files must all have the same sort order unless
|
||||
* the requested output format is unsorted, in which case any combination is valid.
|
||||
*/
|
||||
public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>, Iterable<SAMRecord>, QueryIterator {
|
||||
protected PriorityQueue<ComparableSamRecordIterator> pq = null;
|
||||
protected final SamFileHeaderMerger samHeaderMerger;
|
||||
protected final SAMFileHeader.SortOrder sortOrder;
|
||||
protected static Logger logger = Logger.getLogger(MergingSamRecordIterator2.class);
|
||||
private SAMRecord mNextRecord;
|
||||
protected boolean initialized = false;
|
||||
protected final Reads reads;
|
||||
protected static boolean warnedUserAboutSortOrder = false; // so we only warn the user once
|
||||
|
||||
/**
|
||||
* Constructs a new merging iterator with the same set of readers and sort order as
|
||||
* provided by the header merger parameter.
|
||||
*
|
||||
* @param headerMerger the header to merge
|
||||
* @param reads the reads pile
|
||||
*/
|
||||
public MergingSamRecordIterator2(final SamFileHeaderMerger headerMerger, Reads reads) {
|
||||
this.samHeaderMerger = headerMerger;
|
||||
this.reads = reads;
|
||||
this.sortOrder = headerMerger.getMergedHeader().getSortOrder();
|
||||
this.pq = new PriorityQueue<ComparableSamRecordIterator>(samHeaderMerger.getReaders().size());
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* we hold off initializing because we don't know if they plan to query or just iterate. This function
|
||||
* gets called if the hasNext() determines we haven't been query-ed yet, and we want to setup the iterator to start
|
||||
* at the beginning of the read pile
|
||||
*/
|
||||
private void lazyInitialization() {
|
||||
if (initialized) {
|
||||
throw new UnsupportedOperationException("You cannot double initialize a MergingSamRecordIterator2");
|
||||
}
|
||||
final SAMRecordComparator comparator = getComparator();
|
||||
for (final SAMFileReader reader : samHeaderMerger.getReaders()) {
|
||||
checkSortOrder(reader);
|
||||
|
||||
final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(this.samHeaderMerger, reader, comparator);
|
||||
addIfNotEmpty(iterator);
|
||||
}
|
||||
setInitialized();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* verify the sort order
|
||||
*
|
||||
* @param reader the reader to check
|
||||
*/
|
||||
private void checkSortOrder(SAMFileReader reader) {
|
||||
if (this.sortOrder != SAMFileHeader.SortOrder.unsorted && reader.getFileHeader().getSortOrder() != this.sortOrder) {
|
||||
String msg = String.format("The GATK requires your bam have %s sort order, but your BAM file header %s. Continuing beyond this point is unsafe" +
|
||||
"-- please update your BAM file to have a compatible sort order using samtools sort or Picard MergeBamFiles. You may overcome this error by " +
|
||||
"passing the \'-U ALLOW_UNSET_BAM_SORT_ORDER\' on the command line; before doing so make sure that your input BAM is sorted even if it can't be marked as such.",
|
||||
this.sortOrder, reader.getFileHeader().getAttribute("SO") == null ? "is missing the SO sort order flag" : "has an SO flag set to " + reader.getFileHeader().getAttribute("SO"));
|
||||
if (!reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.ALLOW_UNSET_BAM_SORT_ORDER)) {
|
||||
throw new PicardException(msg);
|
||||
} else if (!warnedUserAboutSortOrder) {
|
||||
warnedUserAboutSortOrder = true;
|
||||
Utils.warnUser(msg);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public boolean supportsSeeking() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public void queryOverlapping(final String contig, final int start, final int stop) {
|
||||
if (initialized) {
|
||||
throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2");
|
||||
}
|
||||
final SAMRecordComparator comparator = getComparator();
|
||||
|
||||
for (final SAMFileReader reader : samHeaderMerger.getReaders()) {
|
||||
checkSortOrder(reader);
|
||||
Iterator<SAMRecord> recordIter = reader.queryOverlapping(contig, start, stop);
|
||||
final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(this.samHeaderMerger, reader, recordIter, comparator);
|
||||
addIfNotEmpty(iterator);
|
||||
}
|
||||
setInitialized();
|
||||
|
||||
}
|
||||
|
||||
public void query(final String contig, final int start, final int stop, final boolean contained) {
|
||||
if (initialized) {
|
||||
throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2");
|
||||
}
|
||||
final SAMRecordComparator comparator = getComparator();
|
||||
for (final SAMFileReader reader : samHeaderMerger.getReaders()) {
|
||||
checkSortOrder(reader);
|
||||
Iterator<SAMRecord> recordIter = reader.query(contig, start, stop, contained);
|
||||
final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(this.samHeaderMerger, reader, recordIter, comparator);
|
||||
addIfNotEmpty(iterator);
|
||||
}
|
||||
setInitialized();
|
||||
|
||||
}
|
||||
|
||||
public void queryUnmappedReads() {
|
||||
if (initialized) {
|
||||
throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2");
|
||||
}
|
||||
final SAMRecordComparator comparator = getComparator();
|
||||
for (final SAMFileReader reader : samHeaderMerger.getReaders()) {
|
||||
Iterator<SAMRecord> recordIter = null;
|
||||
if (reader.hasIndex()) {
|
||||
recordIter = reader.queryUnmapped();
|
||||
} else {
|
||||
// HACK: Supporting completely unmapped BAM files is easy. Let's do a quick check to make sure
|
||||
// these BAMs aren't partially indexed.
|
||||
if (reader.getFileHeader().getSequenceDictionary().size() > 0)
|
||||
throw new StingException("Partially mapped BAM files without indices are not supported");
|
||||
recordIter = reader.iterator();
|
||||
}
|
||||
final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(this.samHeaderMerger, reader, recordIter, comparator);
|
||||
addIfNotEmpty(iterator);
|
||||
}
|
||||
setInitialized();
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void queryContained(final String contig, final int start, final int stop) {
|
||||
if (initialized) {
|
||||
throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2");
|
||||
}
|
||||
final SAMRecordComparator comparator = getComparator();
|
||||
for (final SAMFileReader reader : samHeaderMerger.getReaders()) {
|
||||
checkSortOrder(reader);
|
||||
Iterator<SAMRecord> recordIter = reader.queryContained(contig, start, stop);
|
||||
final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(this.samHeaderMerger, reader, recordIter, comparator);
|
||||
addIfNotEmpty(iterator);
|
||||
}
|
||||
setInitialized();
|
||||
|
||||
}
|
||||
|
||||
private void setInitialized() {
|
||||
initialized = true;
|
||||
mNextRecord = nextRecord();
|
||||
}
|
||||
|
||||
|
||||
/** Returns true if any of the underlying iterators has more records, otherwise false. */
|
||||
public boolean hasNext() {
|
||||
if (!initialized) {
|
||||
lazyInitialization();
|
||||
}
|
||||
if (this.pq.isEmpty() && mNextRecord == null) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public SAMRecord next() {
|
||||
SAMRecord r = mNextRecord;
|
||||
if (!this.pq.isEmpty()) {
|
||||
mNextRecord = nextRecord();
|
||||
} else {
|
||||
mNextRecord = null;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
/** @return the next record, without moving the iterator */
|
||||
public SAMRecord peek() {
|
||||
return mNextRecord;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next record from the top most iterator during merging.
|
||||
*
|
||||
* @return the next record, null if it's unavailable
|
||||
*/
|
||||
public SAMRecord nextRecord() {
|
||||
if (!initialized) {
|
||||
lazyInitialization();
|
||||
}
|
||||
|
||||
final ComparableSamRecordIterator iterator = this.pq.poll();
|
||||
|
||||
if (iterator == null) {
|
||||
return null;
|
||||
}
|
||||
final SAMRecord record = iterator.next();
|
||||
addIfNotEmpty(iterator);
|
||||
|
||||
// Fix the read group if needs be
|
||||
if (this.samHeaderMerger.hasReadGroupCollisions()) {
|
||||
final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.READ_GROUP_ID);
|
||||
if (oldGroupId != null ) {
|
||||
final String newGroupId = this.samHeaderMerger.getReadGroupId(iterator.getReader(), oldGroupId);
|
||||
record.setAttribute(ReservedTagConstants.READ_GROUP_ID, newGroupId);
|
||||
}
|
||||
}
|
||||
|
||||
// Fix the program group if needs be
|
||||
if (this.samHeaderMerger.hasProgramGroupCollisions()) {
|
||||
final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.PROGRAM_GROUP_ID);
|
||||
if (oldGroupId != null ) {
|
||||
final String newGroupId = this.samHeaderMerger.getProgramGroupId(iterator.getReader(), oldGroupId);
|
||||
record.setAttribute(ReservedTagConstants.PROGRAM_GROUP_ID, newGroupId);
|
||||
}
|
||||
}
|
||||
|
||||
// if we don't have a read group, set one correctly
|
||||
if (record.getAttribute(SAMTag.RG.toString()) == null) {
|
||||
List<SAMReadGroupRecord> readGroups = iterator.getReader().getFileHeader().getReadGroups();
|
||||
if (readGroups.size() == 1) {
|
||||
record.setAttribute(SAMTag.RG.toString(), readGroups.get(0).getReadGroupId());
|
||||
record.setAttribute(SAMTag.SM.toString(), readGroups.get(0).getReadGroupId());
|
||||
} else {
|
||||
logger.warn("Unable to set read group of ungrouped read: unable to pick default group, there are " + readGroups.size() + " possible.");
|
||||
}
|
||||
}
|
||||
|
||||
record.setHeader(samHeaderMerger.getMergedHeader());
|
||||
|
||||
//System.out.printf("NEXT = %s %s %d%n", record.getReadName(), record.getReferenceName(), record.getAlignmentStart());
|
||||
//System.out.printf("PEEK = %s %s %d%n", this.pq.peek().peek().getReadName(), this.pq.peek().peek().getReferenceName(), this.pq.peek().peek().getAlignmentStart());
|
||||
return record;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds iterator to priority queue. If the iterator has more records it is added
|
||||
* otherwise it is closed and not added.
|
||||
*
|
||||
* @param iterator the iterator to add
|
||||
*/
|
||||
protected void addIfNotEmpty(final ComparableSamRecordIterator iterator) {
|
||||
//System.out.printf("Adding %s %s %d%n", iterator.peek().getReadName(), iterator.peek().getReferenceName(), iterator.peek().getAlignmentStart());
|
||||
if (iterator.hasNext()) {
|
||||
pq.offer(iterator);
|
||||
} else {
|
||||
iterator.close();
|
||||
}
|
||||
}
|
||||
|
||||
/** Unsupported operation. */
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("MergingSAMRecorderIterator.remove()");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the right comparator for a given sort order (coordinate, alphabetic). In the
|
||||
* case of "unsorted" it will return a comparator that gives an arbitrary but reflexive
|
||||
* ordering.
|
||||
*
|
||||
* @return the SAMRecordComparator appropriate for our sort order
|
||||
*/
|
||||
protected SAMRecordComparator getComparator() {
|
||||
// For unsorted build a fake comparator that compares based on object ID
|
||||
if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) {
|
||||
return new SAMRecordComparator() {
|
||||
public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) {
|
||||
return System.identityHashCode(lhs) - System.identityHashCode(rhs);
|
||||
}
|
||||
|
||||
public int compare(final SAMRecord lhs, final SAMRecord rhs) {
|
||||
return fileOrderCompare(lhs, rhs);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Otherwise try and figure out what kind of comparator to return and build it
|
||||
final Class<? extends SAMRecordComparator> type = this.sortOrder.getComparator();
|
||||
|
||||
try {
|
||||
final Constructor<? extends SAMRecordComparator> ctor = type.getConstructor(SAMFileHeader.class);
|
||||
//System.out.printf("Getting comparator %s%n", ctor.toGenericString());
|
||||
return ctor.newInstance(this.samHeaderMerger.getMergedHeader());
|
||||
}
|
||||
catch (Exception e) {
|
||||
try {
|
||||
final Constructor<? extends SAMRecordComparator> ctor = type.getConstructor();
|
||||
return ctor.newInstance();
|
||||
}
|
||||
catch (Exception e2) {
|
||||
throw new PicardException("Could not instantiate a comparator for sort order: " + this.sortOrder, e2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the merged header that the merging iterator is working from.
|
||||
*
|
||||
* @return our sam file header
|
||||
*/
|
||||
public SAMFileHeader getHeader() {
|
||||
return this.samHeaderMerger.getMergedHeader();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* closes all open iterators....DO THIS or you will run out of handles
|
||||
* with sharding.
|
||||
*/
|
||||
public void close() {
|
||||
for (ComparableSamRecordIterator iterator : pq)
|
||||
iterator.close();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* allows us to be used in the new style for loops
|
||||
*
|
||||
* @return this iterator, which can be used in for loop each iterations
|
||||
*/
|
||||
public Iterator<SAMRecord> iterator() {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets source information for the reads. Contains information about the original reads
|
||||
* files, plus information about downsampling, etc.
|
||||
*
|
||||
* @return the read object we were created for
|
||||
*/
|
||||
public Reads getSourceInfo() {
|
||||
return this.reads;
|
||||
}
|
||||
}
|
||||
|
||||
// Should replace picard class with the same name
|
||||
class ComparableSamRecordIterator extends PeekableIterator<SAMRecord> implements Comparable<ComparableSamRecordIterator>, StingSAMIterator {
|
||||
private Reads sourceInfo;
|
||||
|
|
|
|||
|
|
@ -1,106 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: aaronmckenna
|
||||
* Date: Nov 4, 2009
|
||||
* Time: 10:41:02 PM
|
||||
*/
|
||||
public class PlusOneFixIterator implements StingSAMIterator {
|
||||
/**
|
||||
* our interval region
|
||||
*/
|
||||
private final GenomeLoc mInterval;
|
||||
|
||||
/**
|
||||
* our iterator
|
||||
*/
|
||||
private final StingSAMIterator mIterator;
|
||||
|
||||
/**
|
||||
* our next record
|
||||
*/
|
||||
private SAMRecord mNextRecord;
|
||||
|
||||
/**
|
||||
* This iterator filters all reads that have their start < PLUS_ONE_FIX_CONSTANT from the end of the
|
||||
* interval:
|
||||
* <p/>
|
||||
* <p/>
|
||||
* (this won't make sense in a non-fixed width font)
|
||||
* --------------
|
||||
* interval |
|
||||
* ------------------------
|
||||
* | good read|
|
||||
* --------------------------------------------
|
||||
* | overlap < PLUS_ONE_FIX_CONSTANT, we drop|
|
||||
* -------------------------------------------
|
||||
* <p/>
|
||||
* The problem is that we had to put a +1 on our queryOverlapping() stop position value to SAMTools
|
||||
* due to an indexing problem. Other iterators don't know about this adjustment though, so calculations
|
||||
* like overlap fail. This iterator eliminates them before the read is seen by other iterators.
|
||||
* <p/>
|
||||
* create a plus one fix iterator, given:
|
||||
*
|
||||
* @param inteveral the interval we're checking
|
||||
* @param iterator the iterator to draw reads from
|
||||
*/
|
||||
public PlusOneFixIterator(GenomeLoc inteveral, StingSAMIterator iterator) {
|
||||
if (inteveral == null || iterator == null)
|
||||
throw new StingException("Both parameters to PlusOneFixIterator have to != null");
|
||||
|
||||
// save off the iterator and the interval
|
||||
mInterval = inteveral;
|
||||
mIterator = iterator;
|
||||
|
||||
// pop off the first read
|
||||
if (mIterator.hasNext()) {
|
||||
next();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return (mNextRecord != null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SAMRecord next() {
|
||||
SAMRecord ret = mNextRecord;
|
||||
while (mIterator.hasNext()) {
|
||||
mNextRecord = mIterator.next();
|
||||
if (!(mNextRecord.getAlignmentStart() > mInterval.getStop())) return ret;
|
||||
}
|
||||
mNextRecord = null;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("REMOVE on PlusOneFixIterator is not supported");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reads getSourceInfo() {
|
||||
return this.mIterator.getSourceInfo();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
this.mIterator.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<SAMRecord> iterator() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMRecordComparator;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
*
|
||||
* This interface indicates that the iterator is query able
|
||||
*/
|
||||
public interface QueryIterator extends StingSAMIterator, PeekingIterator<SAMRecord> {
|
||||
|
||||
/**
|
||||
* The required methods to query able
|
||||
**/
|
||||
public void queryOverlapping(final String contig, final int start, final int stop);
|
||||
public void query(final String contig, final int start, final int stop, final boolean contained);
|
||||
public void queryUnmappedReads();
|
||||
public void queryContained(final String contig, final int start, final int stop);
|
||||
}
|
||||
|
|
@ -7,7 +7,6 @@ import net.sf.samtools.SAMRecord;
|
|||
import java.util.List;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.gatk.iterators.QueryIterator;
|
||||
|
||||
|
||||
/*
|
||||
|
|
@ -42,7 +41,7 @@ import org.broadinstitute.sting.gatk.iterators.QueryIterator;
|
|||
* to test out classes that use specific itervals. The reads returned will
|
||||
* all lie in order in the specified interval.
|
||||
*/
|
||||
public class ArtificialSAMQueryIterator extends ArtificialSAMIterator implements QueryIterator {
|
||||
public class ArtificialSAMQueryIterator extends ArtificialSAMIterator {
|
||||
|
||||
// get the next positon
|
||||
protected int finalPos = 0;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.gatk.iterators.QueryIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -203,7 +203,7 @@ public class ArtificialSAMUtils {
|
|||
*
|
||||
* @return StingSAMIterator representing the specified amount of fake data
|
||||
*/
|
||||
public static QueryIterator mappedReadIterator( int startingChr, int endingChr, int readCount ) {
|
||||
public static StingSAMIterator mappedReadIterator( int startingChr, int endingChr, int readCount ) {
|
||||
SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||
|
||||
return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, 0, header);
|
||||
|
|
@ -219,7 +219,7 @@ public class ArtificialSAMUtils {
|
|||
*
|
||||
* @return StingSAMIterator representing the specified amount of fake data
|
||||
*/
|
||||
public static ArtificialSAMIterator mappedAndUnmappedReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) {
|
||||
public static StingSAMIterator mappedAndUnmappedReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) {
|
||||
SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||
|
||||
return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header);
|
||||
|
|
@ -250,7 +250,7 @@ public class ArtificialSAMUtils {
|
|||
*
|
||||
* @return StingSAMIterator representing the specified amount of fake data
|
||||
*/
|
||||
public static ArtificialSAMQueryIterator queryReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) {
|
||||
public static StingSAMIterator queryReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) {
|
||||
SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||
|
||||
return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header);
|
||||
|
|
|
|||
|
|
@ -1,149 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.Before;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class ReadIntervalShardStrategyUnitTest
|
||||
* <p/>
|
||||
* Tests the ReadIntervalShardStrategy class
|
||||
*
|
||||
*
|
||||
* TODO: this test has been changed, since we now force interval shards to not subdivide if they're large,
|
||||
* so you should always get one shard back, reguardless of the specified length. If this behavior changes
|
||||
* back in the future, the expected number of shards should change from 1 to > 1.
|
||||
*
|
||||
*/
|
||||
public class IntervalShardStrategyUnitTest extends BaseTest {
|
||||
|
||||
private GenomeLocSortedSet mSortedSet = null;
|
||||
private SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
|
||||
private static final int NUMBER_OF_CHROMOSOMES = 5;
|
||||
private static final int STARTING_CHROMOSOME = 1;
|
||||
private static final int CHROMOSOME_SIZE = 1000;
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
mSortedSet = new GenomeLocSortedSet();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoExceptionOnEmpty() {
|
||||
IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet,Shard.ShardType.LOCUS_INTERVAL);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSingleChromosomeFunctionality() {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 1000);
|
||||
mSortedSet.add(loc);
|
||||
IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet,Shard.ShardType.LOCUS_INTERVAL);
|
||||
int counter = 0;
|
||||
Shard d = null;
|
||||
while (strat.hasNext()) {
|
||||
d = strat.next();
|
||||
counter++;
|
||||
}
|
||||
assertTrue(d instanceof IntervalShard);
|
||||
assertEquals(1, counter);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleChromosomeFunctionality() {
|
||||
for (int x = 0; x < 5; x++) {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(x, 1, 1000);
|
||||
mSortedSet.add(loc);
|
||||
}
|
||||
IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet,Shard.ShardType.LOCUS_INTERVAL);
|
||||
int counter = 0;
|
||||
Shard d = null;
|
||||
while (strat.hasNext()) {
|
||||
d = strat.next();
|
||||
counter++;
|
||||
}
|
||||
assertTrue(d instanceof IntervalShard);
|
||||
assertEquals(5, counter);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOddSizeShardFunctionality() {
|
||||
for (int x = 0; x < 5; x++) {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(x, 1, 1000);
|
||||
mSortedSet.add(loc);
|
||||
}
|
||||
IntervalShardStrategy strat = new IntervalShardStrategy(789, mSortedSet,Shard.ShardType.LOCUS_INTERVAL);
|
||||
int counter = 0;
|
||||
while (strat.hasNext()) {
|
||||
Shard d = strat.next();
|
||||
assertEquals(1,d.getGenomeLocs().size());
|
||||
assertEquals(1, d.getGenomeLocs().get(0).getStart());
|
||||
assertEquals(1000, d.getGenomeLocs().get(0).getStop());
|
||||
counter++;
|
||||
}
|
||||
assertEquals(5, counter);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testInfiniteShardSize() {
|
||||
for (int x = 0; x < 5; x++) {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(x, 1, 1000);
|
||||
mSortedSet.add(loc);
|
||||
}
|
||||
IntervalShardStrategy strat = new IntervalShardStrategy(Long.MAX_VALUE, mSortedSet,Shard.ShardType.LOCUS_INTERVAL);
|
||||
int counter = 0;
|
||||
while (strat.hasNext()) {
|
||||
Shard d = strat.next();
|
||||
assertEquals(1,d.getGenomeLocs().size());
|
||||
assertEquals(1000, d.getGenomeLocs().get(0).getStop());
|
||||
counter++;
|
||||
}
|
||||
assertEquals(5, counter);
|
||||
}
|
||||
|
||||
@Test(expected = UnsupportedOperationException.class)
|
||||
public void testRemove() {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 1000);
|
||||
mSortedSet.add(loc);
|
||||
IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet,Shard.ShardType.LOCUS_INTERVAL);
|
||||
strat.remove();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class IntervalReadShardTest
|
||||
* <p/>
|
||||
* Tests for the IntervalReadShard class.
|
||||
*/
|
||||
public class IntervalShardUnitTest extends BaseTest {
|
||||
|
||||
private IntervalShard intervalShard = null;
|
||||
private SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
|
||||
private static final int NUMBER_OF_CHROMOSOMES = 5;
|
||||
private static final int STARTING_CHROMOSOME = 1;
|
||||
private static final int CHROMOSOME_SIZE = 1000;
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void simpleReturn() {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 100);
|
||||
intervalShard = new IntervalShard(loc,Shard.ShardType.LOCUS_INTERVAL);
|
||||
assertEquals("Input parameters imply a single-locus shard",1,intervalShard.getGenomeLocs().size());
|
||||
assertTrue(intervalShard.getGenomeLocs().get(0).equals(loc));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void ensureNotReference() {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 100);
|
||||
intervalShard = new IntervalShard(loc,Shard.ShardType.LOCUS_INTERVAL);
|
||||
assertEquals("Input parameters imply a single-locus shard",1,intervalShard.getGenomeLocs().size());
|
||||
assertTrue(intervalShard.getGenomeLocs().get(0) != loc && intervalShard.getGenomeLocs().get(0).equals(loc));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,122 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class LocusShardStrategyTest
|
||||
* <p/>
|
||||
* Test for the Locus Shard Strategy
|
||||
*/
|
||||
public class LinearLocusShardStrategyUnitTest extends BaseTest {
|
||||
|
||||
private GenomeLocSortedSet mSortedSet = null;
|
||||
private SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
|
||||
private static final int NUMBER_OF_CHROMOSOMES = 5;
|
||||
private static final int STARTING_CHROMOSOME = 1;
|
||||
private static final int CHROMOSOME_SIZE = 1000;
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSetup() {
|
||||
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500, -1);
|
||||
int counter = 0;
|
||||
while(strat.hasNext()) {
|
||||
Shard d = strat.next();
|
||||
assertTrue(d instanceof LocusShard);
|
||||
assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size());
|
||||
assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 499);
|
||||
++counter;
|
||||
}
|
||||
assertTrue(counter == 10);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAdjustSize() {
|
||||
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500, -1);
|
||||
strat.adjustNextShardSize(1000);
|
||||
int counter = 0;
|
||||
while(strat.hasNext()) {
|
||||
Shard d = strat.next();
|
||||
assertTrue(d instanceof LocusShard);
|
||||
assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size());
|
||||
assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 999);
|
||||
++counter;
|
||||
}
|
||||
assertTrue(counter == 5);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testUnevenSplit() {
|
||||
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 600, -1);
|
||||
int counter = 0;
|
||||
while(strat.hasNext()) {
|
||||
Shard d = strat.next();
|
||||
assertTrue(d instanceof LocusShard);
|
||||
assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size());
|
||||
if (counter % 2 == 0) {
|
||||
assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 599);
|
||||
} else {
|
||||
assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 399);
|
||||
}
|
||||
++counter;
|
||||
}
|
||||
assertTrue(counter == 10);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDashMOption() {
|
||||
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 600, 200);
|
||||
int counter = 0;
|
||||
while(strat.hasNext()) {
|
||||
Shard d = strat.next();
|
||||
assertTrue(d instanceof LocusShard);
|
||||
assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size());
|
||||
assertEquals(199,(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart()));
|
||||
++counter;
|
||||
}
|
||||
assertTrue(counter == 1);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,78 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
import static junit.framework.Assert.fail;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.junit.*;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Apr 8, 2009
|
||||
* Time: 11:31:04 AM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
*/
|
||||
public class ShardStrategyFactoryUnitTest extends BaseTest {
|
||||
|
||||
private SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
|
||||
private static final int NUMBER_OF_CHROMOSOMES = 5;
|
||||
private static final int STARTING_CHROMOSOME = 1;
|
||||
private static final int CHROMOSOME_SIZE = 1000;
|
||||
private GenomeLocSortedSet set = null;
|
||||
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
set = new GenomeLocSortedSet();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReadNonInterval() {
|
||||
ShardStrategy st = ShardStrategyFactory.shatter(null,null,ShardStrategyFactory.SHATTER_STRATEGY.READS,header.getSequenceDictionary(),100);
|
||||
assertTrue(st instanceof ReadShardStrategy);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReadInterval() {
|
||||
GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100);
|
||||
set.add(l);
|
||||
ShardStrategy st = ShardStrategyFactory.shatter(null,null,ShardStrategyFactory.SHATTER_STRATEGY.READS,header.getSequenceDictionary(),100,set);
|
||||
assertTrue(st instanceof IntervalShardStrategy);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLinearNonInterval() {
|
||||
ShardStrategy st = ShardStrategyFactory.shatter(null,null,ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,header.getSequenceDictionary(),100);
|
||||
assertTrue(st instanceof LinearLocusShardStrategy);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLinearInterval() {
|
||||
GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100);
|
||||
set.add(l);
|
||||
ShardStrategy st = ShardStrategyFactory.shatter(null,null,ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,header.getSequenceDictionary(),100,set);
|
||||
assertTrue(st instanceof LinearLocusShardStrategy);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,102 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMIterator;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMQueryIterator;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.io.File;
|
||||
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* use this to inject into SAMDataSource for testing
|
||||
*/
|
||||
public class ArtificialResourcePool extends SAMResourcePool {
|
||||
// How strict should we be with SAM/BAM parsing?
|
||||
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT;
|
||||
|
||||
// the header
|
||||
private SAMFileHeader header;
|
||||
private ArtificialSAMIterator iterator;
|
||||
|
||||
/**
|
||||
* Track the iterator to see whether it's venturing into unmapped reads for the first
|
||||
* time. If so, query straight there. Only works for query iterators.
|
||||
*
|
||||
* TODO: Clean up.
|
||||
*/
|
||||
private boolean intoUnmappedReads = false;
|
||||
|
||||
public ArtificialResourcePool( SAMFileHeader header, ArtificialSAMIterator iterator ) {
|
||||
super( new Reads(Collections.<File>emptyList()) );
|
||||
this.header = header;
|
||||
this.iterator = iterator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public StingSAMIterator iterator( DataStreamSegment segment ) {
|
||||
if (segment instanceof MappedStreamSegment && iterator instanceof ArtificialSAMQueryIterator) {
|
||||
ArtificialSAMQueryIterator queryIterator = (ArtificialSAMQueryIterator)iterator;
|
||||
MappedStreamSegment mappedSegment = (MappedStreamSegment)segment;
|
||||
GenomeLoc bounds = mappedSegment.locus;
|
||||
if (!this.queryOverlapping) {
|
||||
queryIterator.queryContained(bounds.getContig(), (int)bounds.getStart(), (int)bounds.getStop());
|
||||
} else {
|
||||
queryIterator.queryOverlapping(bounds.getContig(), (int)bounds.getStart(), (int)bounds.getStop());
|
||||
}
|
||||
return queryIterator;
|
||||
}
|
||||
else if (segment instanceof UnmappedStreamSegment) {
|
||||
if( !intoUnmappedReads ) {
|
||||
if( iterator instanceof ArtificialSAMQueryIterator ) {
|
||||
ArtificialSAMQueryIterator queryIterator = (ArtificialSAMQueryIterator)iterator;
|
||||
queryIterator.queryUnmappedReads();
|
||||
}
|
||||
intoUnmappedReads = true;
|
||||
}
|
||||
return new BoundedReadIterator(iterator,((UnmappedStreamSegment)segment).size);
|
||||
}
|
||||
else
|
||||
throw new StingException("Unsupported segment type passed to test");
|
||||
}
|
||||
|
||||
/**
|
||||
* get the merged header
|
||||
*
|
||||
* @return the merged header
|
||||
*/
|
||||
public SAMFileHeader getHeader() {
|
||||
return this.header;
|
||||
}
|
||||
}
|
||||
|
|
@ -50,7 +50,7 @@ import java.util.List;
|
|||
public class SAMBAMDataSourceUnitTest extends BaseTest {
|
||||
|
||||
private List<File> fl;
|
||||
private ReferenceSequenceFile seq;
|
||||
private IndexedFastaSequenceFile seq;
|
||||
|
||||
/**
|
||||
* This function does the setup of our parser, before each method call.
|
||||
|
|
@ -88,8 +88,8 @@ public class SAMBAMDataSourceUnitTest extends BaseTest {
|
|||
Reads reads = new Reads(fl);
|
||||
|
||||
// the sharding strat.
|
||||
SAMDataSource data = new IndexDrivenSAMDataSource(reads);
|
||||
ShardStrategy strat = ShardStrategyFactory.shatter(data,null,ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, seq.getSequenceDictionary(), 100000);
|
||||
SAMDataSource data = new BlockDrivenSAMDataSource(reads);
|
||||
ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000);
|
||||
int count = 0;
|
||||
|
||||
try {
|
||||
|
|
@ -133,8 +133,8 @@ public class SAMBAMDataSourceUnitTest extends BaseTest {
|
|||
Reads reads = new Reads(fl);
|
||||
|
||||
// the sharding strat.
|
||||
SAMDataSource data = new IndexDrivenSAMDataSource(reads);
|
||||
ShardStrategy strat = ShardStrategyFactory.shatter(data,null,ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, seq.getSequenceDictionary(), 100000);
|
||||
SAMDataSource data = new BlockDrivenSAMDataSource(reads);
|
||||
ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000);
|
||||
|
||||
ArrayList<Integer> readcountPerShard = new ArrayList<Integer>();
|
||||
ArrayList<Integer> readcountPerShard2 = new ArrayList<Integer>();
|
||||
|
|
@ -176,8 +176,8 @@ public class SAMBAMDataSourceUnitTest extends BaseTest {
|
|||
|
||||
count = 0;
|
||||
// the sharding strat.
|
||||
data = new IndexDrivenSAMDataSource(reads);
|
||||
strat = ShardStrategyFactory.shatter(data,null,ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, seq.getSequenceDictionary(), 100000);
|
||||
data = new BlockDrivenSAMDataSource(reads);
|
||||
strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000);
|
||||
|
||||
logger.debug("Pile two:");
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -1,151 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class SAMByIntervalUnitTest
|
||||
* <p/>
|
||||
* Test that the SAM data source behaves well given intervals
|
||||
*/
|
||||
public class SAMByIntervalUnitTest extends BaseTest {
|
||||
private List<File> fl;
|
||||
ShardStrategy shardStrategy;
|
||||
Reads reads;
|
||||
private int targetReadCount = 14;
|
||||
|
||||
|
||||
// constants we use throughout the tests
|
||||
protected final int READ_COUNT;
|
||||
protected final int ENDING_CHROMO;
|
||||
protected final int STARTING_CHROMO;
|
||||
protected final int UNMAPPED_READ_COUNT;
|
||||
|
||||
public SAMByIntervalUnitTest() {
|
||||
READ_COUNT = 100;
|
||||
ENDING_CHROMO = 10;
|
||||
STARTING_CHROMO = 1;
|
||||
UNMAPPED_READ_COUNT = 1000;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function does the setup of our parser, before each method call.
|
||||
* <p/>
|
||||
* Called before every test case method.
|
||||
*/
|
||||
@Before
|
||||
public void doForEachTest() {
|
||||
|
||||
fl = new ArrayList<File>();
|
||||
|
||||
// sequence
|
||||
//seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly17/v0/Homo_sapiens_assembly17.fasta"));
|
||||
//GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary());
|
||||
|
||||
// setup the test files
|
||||
fl.add(new File(validationDataLocation + "index_test.bam"));
|
||||
reads = new Reads(fl);
|
||||
}
|
||||
|
||||
|
||||
/** run a test on data over a specific interval */
|
||||
private void testRead( int start, int stop, int readCount ) {
|
||||
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(STARTING_CHROMO, ENDING_CHROMO, READ_COUNT, UNMAPPED_READ_COUNT),
|
||||
ArtificialSAMUtils.mappedAndUnmappedReadIterator(STARTING_CHROMO, ENDING_CHROMO, READ_COUNT, UNMAPPED_READ_COUNT));
|
||||
|
||||
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||
int unmappedReadsSeen = 0;
|
||||
int iterations = 0;
|
||||
|
||||
IndexDrivenSAMDataSource data = new IndexDrivenSAMDataSource(reads);
|
||||
data.setResourcePool(gen);
|
||||
GenomeLocSortedSet set = new GenomeLocSortedSet();
|
||||
set.add(GenomeLocParser.createGenomeLoc(0, start, stop));
|
||||
ShardStrategy strat = ShardStrategyFactory.shatter(data,null,ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL, gen.getHeader().getSequenceDictionary(), UNMAPPED_READ_COUNT, set);
|
||||
|
||||
StingSAMIterator iter = data.seek(strat.next());
|
||||
int count = 0;
|
||||
while (iter.hasNext()) {
|
||||
SAMRecord r = iter.next();
|
||||
// uncomment for debugging - System.err.println(r.getAlignmentStart() + " " + r.getAlignmentEnd());
|
||||
count++;
|
||||
}
|
||||
assertEquals(readCount, count);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* test out that we get a single read, given the specific size
|
||||
*/
|
||||
@Test
|
||||
public void testSingleRead() {
|
||||
testRead(1,ArtificialSAMUtils.DEFAULT_READ_LENGTH,50);
|
||||
}
|
||||
|
||||
/**
|
||||
* test out that we get the expected amount for a whole chromosome
|
||||
*/
|
||||
@Test
|
||||
public void testChromosome() {
|
||||
testRead(1, READ_COUNT, READ_COUNT);
|
||||
}
|
||||
|
||||
/**
|
||||
* test out that we get the expected amount for a whole chromosome
|
||||
*/
|
||||
@Test
|
||||
public void testMiddle() {
|
||||
testRead(20, READ_COUNT-20,61);
|
||||
}
|
||||
|
||||
|
||||
private SAMFileHeader createArtificialSamHeader( int startingChr, int endingChr, int readCount, int readSize ) {
|
||||
return ArtificialSAMUtils.createArtificialSamHeader(( endingChr - startingChr ) + 1,
|
||||
startingChr,
|
||||
readCount + readSize);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,175 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import static junit.framework.Assert.fail;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.iterators.*;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Apr 15, 2009
|
||||
* Time: 7:12:59 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
*/
|
||||
public class SAMByReadsUnitTest extends BaseTest {
|
||||
|
||||
|
||||
private List<File> fl;
|
||||
ShardStrategy shardStrategy;
|
||||
Reads reads;
|
||||
private int targetReadCount = 14;
|
||||
|
||||
/**
|
||||
* This function does the setup of our parser, before each method call.
|
||||
* <p/>
|
||||
* Called before every test case method.
|
||||
*/
|
||||
@Before
|
||||
public void doForEachTest() {
|
||||
fl = new ArrayList<File>();
|
||||
|
||||
// sequence
|
||||
//seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly17/v0/Homo_sapiens_assembly17.fasta"));
|
||||
//GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary());
|
||||
|
||||
// setup the test files
|
||||
fl.add(new File(validationDataLocation + "index_test.bam"));
|
||||
reads = new Reads(fl);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Test out that we can shard the file and iterate over every read
|
||||
*/
|
||||
@Test
|
||||
public void testToUnmappedReads() {
|
||||
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1, 1, 1, 50),
|
||||
ArtificialSAMUtils.mappedAndUnmappedReadIterator(1, 1, 1, 10));
|
||||
|
||||
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||
try {
|
||||
int unmappedReadsSeen = 0;
|
||||
int iterations = 0;
|
||||
|
||||
IndexDrivenSAMDataSource data = new IndexDrivenSAMDataSource(reads);
|
||||
data.setResourcePool(gen);
|
||||
++iterations;
|
||||
StingSAMIterator ret = data.toUnmappedReads(100);
|
||||
// count the reads we've gotten back
|
||||
if (ret == null) {
|
||||
fail("On iteration " + iterations + " we were returned a null pointer, after seeing " + unmappedReadsSeen + " reads out of a 1000");
|
||||
}
|
||||
while (ret.hasNext()) {
|
||||
ret.next();
|
||||
unmappedReadsSeen++;
|
||||
}
|
||||
assertEquals(10, unmappedReadsSeen);
|
||||
} catch (SimpleDataSourceLoadException e) {
|
||||
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
||||
fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test out that we can shard the file and iterate over every read
|
||||
*/
|
||||
@Test
|
||||
public void testShardingOfReadsEvenSplit() {
|
||||
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1, 1, 10, 50),
|
||||
ArtificialSAMUtils.mappedAndUnmappedReadIterator(1, 1, 10, 10));
|
||||
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||
targetReadCount = 5;
|
||||
try {
|
||||
int readCount = 0;
|
||||
IndexDrivenSAMDataSource data = new IndexDrivenSAMDataSource(reads);
|
||||
|
||||
data.setResourcePool(gen);
|
||||
shardStrategy = ShardStrategyFactory.shatter(data,null,ShardStrategyFactory.SHATTER_STRATEGY.READS, gen.getHeader().getSequenceDictionary(), targetReadCount);
|
||||
while (shardStrategy.hasNext()) {
|
||||
StingSAMIterator ret = data.seek(shardStrategy.next());
|
||||
assertTrue(ret != null);
|
||||
while (ret.hasNext()) {
|
||||
ret.next();
|
||||
readCount++;
|
||||
}
|
||||
ret.close();
|
||||
}
|
||||
assertEquals(20, readCount);
|
||||
} catch (SimpleDataSourceLoadException e) {
|
||||
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
||||
fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test out that we can shard the file and iterate over every read
|
||||
*/
|
||||
@Test
|
||||
public void testShardingOfReadsOddRemainder() {
|
||||
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1, 1, 10, 100),
|
||||
ArtificialSAMUtils.mappedAndUnmappedReadIterator(1, 1, 10, 10));
|
||||
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||
targetReadCount = 3;
|
||||
try {
|
||||
int readCount = 0;
|
||||
IndexDrivenSAMDataSource data = new IndexDrivenSAMDataSource(reads);
|
||||
|
||||
|
||||
data.setResourcePool(gen);
|
||||
shardStrategy = ShardStrategyFactory.shatter(data,null,ShardStrategyFactory.SHATTER_STRATEGY.READS, gen.getHeader().getSequenceDictionary(), targetReadCount);
|
||||
while (shardStrategy.hasNext()) {
|
||||
|
||||
|
||||
StingSAMIterator ret = data.seek(shardStrategy.next());
|
||||
assertTrue(ret != null);
|
||||
while (ret.hasNext()) {
|
||||
ret.next();
|
||||
readCount++;
|
||||
}
|
||||
ret.close();
|
||||
}
|
||||
|
||||
// assert that we saw 2000 reads
|
||||
assertEquals(20, readCount);
|
||||
|
||||
} catch (SimpleDataSourceLoadException e) {
|
||||
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
||||
fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException");
|
||||
}
|
||||
}
|
||||
|
||||
private SAMFileHeader createArtificialSamHeader(int startingChr, int endingChr, int readCount, int readSize) {
|
||||
return ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1,
|
||||
startingChr,
|
||||
readCount + readSize);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -6,17 +6,9 @@ import net.sf.samtools.SAMFileHeader;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SimpleDataSourceLoadException;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.IndexDrivenSAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.junit.Assert;
|
||||
|
|
|
|||
|
|
@ -1,80 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.junit.Assert;
|
||||
import static org.junit.Assert.fail;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.Iterator;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class DuplicateDetectorIteratorTest
|
||||
* <p/>
|
||||
* test the DuplicateDetectorIterator class.
|
||||
*/
|
||||
public class IntervalOverlapIteratorUnitTest extends BaseTest {
|
||||
private final File bam = new File(validationDataLocation + "index_test.bam");
|
||||
private static IndexedFastaSequenceFile seq;
|
||||
private int chromosomeOneReadCount = 885;
|
||||
|
||||
//GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary());
|
||||
/**
|
||||
* This function does the setup of our parser, before each method call.
|
||||
* <p/>
|
||||
* Called before every test case method.
|
||||
*/
|
||||
@BeforeClass
|
||||
public static void beforeAll() {
|
||||
try {
|
||||
seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||
} catch (FileNotFoundException e) {
|
||||
fail("Unexpected Exception" + e.getLocalizedMessage());
|
||||
}
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOverlappingIntervals() {
|
||||
int countOfReads = 0;
|
||||
int seqLength = seq.getSequenceDictionary().getSequence("chr1").getSequenceLength();
|
||||
GenomeLoc last = GenomeLocParser.createGenomeLoc("chr1", 1, 470535);
|
||||
|
||||
// first count the initial pile of reads
|
||||
SAMFileReader reader = new SAMFileReader(bam, true);
|
||||
reader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT);
|
||||
Iterator<SAMRecord> i = reader.queryOverlapping("chr1",1,470535);
|
||||
GenomeLoc newLoc;
|
||||
while (i.hasNext()) {
|
||||
i.next();
|
||||
countOfReads++;
|
||||
}
|
||||
reader.close();
|
||||
while (last.getStart() < seq.getSequenceDictionary().getSequence("chr1").getSequenceLength()) {
|
||||
reader = new SAMFileReader(bam, true);
|
||||
long stop = (last.getStop() >= seqLength) ? seqLength : last.getStop() + 470535;
|
||||
newLoc = GenomeLocParser.createGenomeLoc(last.getContigIndex(),last.getStart()+470535,stop);
|
||||
reader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT);
|
||||
i = reader.queryOverlapping(newLoc.getContig(),(int)newLoc.getStart(),(int)newLoc.getStop());
|
||||
IntervalOverlapIterator iter = new IntervalOverlapIterator(StingSAMIteratorAdapter.adapt(null, i),last,false);
|
||||
while(iter.hasNext()) {
|
||||
countOfReads++;
|
||||
iter.next();
|
||||
}
|
||||
last = newLoc;
|
||||
reader.close();
|
||||
|
||||
}
|
||||
Assert.assertEquals(chromosomeOneReadCount,countOfReads);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,120 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.junit.Assert;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: aaronmckenna
|
||||
* Date: Nov 4, 2009
|
||||
* Time: 11:02:24 PM
|
||||
*/
|
||||
public class PlusOneFixIteratorUnitTest extends BaseTest {
|
||||
private final File bam = new File(validationDataLocation + "index_test.bam");
|
||||
private static IndexedFastaSequenceFile seq;
|
||||
private int chromosomeOneReadCount = 885;
|
||||
|
||||
//GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary());
|
||||
/**
|
||||
* This function does the setup of our parser, before each method call.
|
||||
* <p/>
|
||||
* Called before every test case method.
|
||||
*/
|
||||
@BeforeClass
|
||||
public static void beforeAll() {
|
||||
try {
|
||||
seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||
} catch (FileNotFoundException e) {
|
||||
fail("Unexpected Exception" + e.getLocalizedMessage());
|
||||
}
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
}
|
||||
|
||||
/**
|
||||
* there's a read starting at chr1:1108664, make our interval go from chr1 to 1108664, apply the iterator,
|
||||
* and we shouldn't see the read.
|
||||
*/
|
||||
@Test
|
||||
public void testReadAtEndOfInterval() {
|
||||
int countOfReads = 0;
|
||||
SAMFileReader reader = new SAMFileReader(bam, true);
|
||||
final int size = 1108663;
|
||||
reader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT);
|
||||
|
||||
GenomeLoc last = GenomeLocParser.createGenomeLoc("chr1", 1, size);
|
||||
Iterator<SAMRecord> i = new PlusOneFixIterator(last, StingSAMIteratorAdapter.adapt(null, reader.queryOverlapping("chr1", 1, 1108663)));
|
||||
//Iterator<SAMRecord> i = reader.queryOverlapping("chr1", 1, size);
|
||||
while (i.hasNext()) {
|
||||
SAMRecord rec = i.next();
|
||||
countOfReads++;
|
||||
}
|
||||
Assert.assertEquals(2, countOfReads);
|
||||
}
|
||||
|
||||
/**
|
||||
* there are 4296 reads in chr1, make sure we see them all.
|
||||
*
|
||||
* chr1 length = 247249719 (and no reads start at the last position
|
||||
*/
|
||||
@Test
|
||||
public void testAllReads() {
|
||||
int countOfReads = 0;
|
||||
SAMFileReader reader = new SAMFileReader(bam, true);
|
||||
reader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT);
|
||||
final int size = 247249719;
|
||||
GenomeLoc last = GenomeLocParser.createGenomeLoc("chr1", 1, size);
|
||||
Iterator<SAMRecord> i = new PlusOneFixIterator(last, StingSAMIteratorAdapter.adapt(null, reader.queryOverlapping("chr1", 1, size)));
|
||||
//Iterator<SAMRecord> i = reader.queryOverlapping("chr1", 1, size);
|
||||
while (i.hasNext()) {
|
||||
SAMRecord rec = i.next();
|
||||
countOfReads++;
|
||||
}
|
||||
Assert.assertEquals(885, countOfReads);
|
||||
}
|
||||
|
||||
/**
|
||||
* there are 4296 reads in chr1, make sure we see them all.
|
||||
*
|
||||
* chr1 length = 247249719 (and no reads start at the last position
|
||||
*/
|
||||
@Test
|
||||
public void testAllReadsSharded() {
|
||||
int countOfReads = 0;
|
||||
|
||||
final int size = 247249719;
|
||||
int currentStop = 0;
|
||||
int incr = 100000;
|
||||
GenomeLoc last = GenomeLocParser.createGenomeLoc("chr1", 1, size);
|
||||
while (currentStop < size) {
|
||||
SAMFileReader reader = new SAMFileReader(bam, true);
|
||||
reader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT);
|
||||
int lastStop = (currentStop > 1) ? currentStop: 1;
|
||||
if (currentStop + incr < size)
|
||||
currentStop += incr;
|
||||
else
|
||||
currentStop = size;
|
||||
Iterator<SAMRecord> i = new PlusOneFixIterator(last, StingSAMIteratorAdapter.adapt(null, reader.queryOverlapping("chr1", lastStop, currentStop)));
|
||||
|
||||
while (i.hasNext()) {
|
||||
SAMRecord rec = i.next();
|
||||
countOfReads++;
|
||||
}
|
||||
reader.close();
|
||||
}
|
||||
Assert.assertEquals(885, countOfReads);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -9,7 +9,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
|||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.IndexDrivenSAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.BlockDrivenSAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -103,54 +103,6 @@ public class TraverseReadsUnitTest extends BaseTest {
|
|||
|
||||
}
|
||||
|
||||
|
||||
/** Test out that we can shard the file and iterate over every read */
|
||||
@Test
|
||||
public void testMappedReadCount() {
|
||||
|
||||
IndexedFastaSequenceFile ref = null;
|
||||
try {
|
||||
ref = new IndexedFastaSequenceFile(refFile);
|
||||
}
|
||||
catch (FileNotFoundException ex) {
|
||||
throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex);
|
||||
}
|
||||
GenomeLocParser.setupRefContigOrdering(ref);
|
||||
|
||||
SAMDataSource dataSource = new IndexDrivenSAMDataSource(new Reads(bamList));
|
||||
dataSource.viewUnmappedReads(false);
|
||||
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref,ShardStrategyFactory.SHATTER_STRATEGY.READS,
|
||||
ref.getSequenceDictionary(),
|
||||
readSize);
|
||||
|
||||
countReadWalker.initialize();
|
||||
Object accumulator = countReadWalker.reduceInit();
|
||||
|
||||
while (shardStrategy.hasNext()) {
|
||||
Shard shard = shardStrategy.next();
|
||||
|
||||
if (shard == null) {
|
||||
fail("Shard == null");
|
||||
}
|
||||
|
||||
ShardDataProvider dataProvider = new ReadShardDataProvider(shard,dataSource.seek(shard),null,null);
|
||||
accumulator = traversalEngine.traverse(countReadWalker, dataProvider, accumulator);
|
||||
dataProvider.close();
|
||||
|
||||
}
|
||||
|
||||
traversalEngine.printOnTraversalDone("reads", accumulator);
|
||||
countReadWalker.onTraversalDone(accumulator);
|
||||
|
||||
if (!(accumulator instanceof Integer)) {
|
||||
fail("Count read walker should return an interger.");
|
||||
}
|
||||
if (((Integer) accumulator) != 9721) {
|
||||
fail("there should be 9721 mapped reads in the index file, there was " + ((Integer) accumulator) );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Test out that we can shard the file and iterate over every read */
|
||||
@Test
|
||||
public void testUnmappedReadCount() {
|
||||
|
|
@ -163,9 +115,8 @@ public class TraverseReadsUnitTest extends BaseTest {
|
|||
}
|
||||
GenomeLocParser.setupRefContigOrdering(ref);
|
||||
|
||||
SAMDataSource dataSource = new IndexDrivenSAMDataSource(new Reads(bamList));
|
||||
dataSource.viewUnmappedReads(true);
|
||||
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref,ShardStrategyFactory.SHATTER_STRATEGY.READS,
|
||||
SAMDataSource dataSource = new BlockDrivenSAMDataSource(new Reads(bamList));
|
||||
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref,ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL,
|
||||
ref.getSequenceDictionary(),
|
||||
readSize);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue