Switch TraverseByLoci over to new sharding system, and cleanup some code in passing read files along
the pathway from command line to traversal engine. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@727 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
57e5f22987
commit
2c4de7b5c5
|
|
@ -1,11 +1,18 @@
|
|||
package org.broadinstitute.sting.gatk;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.xReadLines;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
|
|
@ -80,6 +87,9 @@ public class CommandLineGATK extends CommandLineProgram {
|
|||
}
|
||||
loadArgumentsIntoObject(argCollection);
|
||||
loadArgumentsIntoObject(mWalker);
|
||||
|
||||
processArguments(argCollection);
|
||||
|
||||
this.argCollection.analysisName = this.analysisName;
|
||||
try {
|
||||
GATKEngine = new GenomeAnalysisEngine(argCollection, mWalker);
|
||||
|
|
@ -132,4 +142,38 @@ public class CommandLineGATK extends CommandLineProgram {
|
|||
this.argCollection = argCollection;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Preprocess the arguments before submitting them to the GATK engine.
|
||||
* @param argCollection Collection of arguments to preprocess.
|
||||
*/
|
||||
private void processArguments( GATKArgumentCollection argCollection ) {
|
||||
argCollection.samFiles = unpackReads( argCollection.samFiles );
|
||||
}
|
||||
|
||||
/**
|
||||
* Unpack the files to be processed, given a list of files. That list of files can
|
||||
* itself contain lists of other files to be read.
|
||||
* @param inputFiles
|
||||
* @return
|
||||
*/
|
||||
private List<File> unpackReads( List<File> inputFiles ) {
|
||||
List<File> unpackedReads = new ArrayList<File>();
|
||||
for( File inputFile: inputFiles ) {
|
||||
if( inputFile.getName().endsWith(".list") ) {
|
||||
try {
|
||||
for( String fileName : new xReadLines(inputFile) )
|
||||
unpackedReads.add( new File(fileName) );
|
||||
}
|
||||
catch( FileNotFoundException ex ) {
|
||||
throw new StingException("Unable to find file while unpacking reads", ex);
|
||||
}
|
||||
}
|
||||
else
|
||||
unpackedReads.add( inputFile );
|
||||
}
|
||||
return unpackedReads;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -96,15 +96,15 @@ public class GATKArgumentCollection {
|
|||
|
||||
@Element(required=false)
|
||||
@Argument(fullName = "sort_on_the_fly", shortName = "sort", doc = "Maximum number of reads to sort on the fly", required = false)
|
||||
public String maximumReadSorts = null;
|
||||
public Integer maximumReadSorts = null;
|
||||
|
||||
@Element(required=false)
|
||||
@Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false)
|
||||
public String downsampleFraction = null;
|
||||
public Double downsampleFraction = null;
|
||||
|
||||
@Element(required=false)
|
||||
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to", required = false)
|
||||
public String downsampleCoverage = null;
|
||||
public Integer downsampleCoverage = null;
|
||||
|
||||
@Element(required=false)
|
||||
@Argument(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false)
|
||||
|
|
@ -238,13 +238,16 @@ public class GATKArgumentCollection {
|
|||
if (!other.unsafe.equals(this.unsafe)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.maximumReadSorts.equals(this.maximumReadSorts)) {
|
||||
if ((other.maximumReadSorts == null && this.maximumReadSorts != null) ||
|
||||
(other.maximumReadSorts != null && !other.maximumReadSorts.equals(this.maximumReadSorts))) {
|
||||
return false;
|
||||
}
|
||||
if (!other.downsampleFraction.equals(this.downsampleFraction)) {
|
||||
if ((other.downsampleFraction == null && this.downsampleFraction != null) ||
|
||||
(other.downsampleFraction != null && !other.downsampleFraction.equals(this.downsampleFraction))) {
|
||||
return false;
|
||||
}
|
||||
if (!other.downsampleCoverage.equals(this.downsampleCoverage)) {
|
||||
if ((other.downsampleCoverage == null && this.downsampleCoverage != null) ||
|
||||
(other.downsampleCoverage != null && !other.downsampleCoverage.equals(this.downsampleCoverage))) {
|
||||
return false;
|
||||
}
|
||||
if (!other.walkAllLoci.equals(this.walkAllLoci)) {
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ public class GenomeAnalysisEngine {
|
|||
|
||||
// if we're a read or a locus walker, we use the new system. Right now we have complicated
|
||||
// branching based on the input data, but this should disapear when all the traversals are switched over
|
||||
if ((my_walker instanceof LocusWalker && argCollection.walkAllLoci && !(argCollection.samFiles == null || argCollection.samFiles.size() == 0)) ||
|
||||
if ((my_walker instanceof LocusWalker && !(argCollection.samFiles == null || argCollection.samFiles.size() == 0)) ||
|
||||
my_walker instanceof ReadWalker) {
|
||||
microScheduler = createMicroscheduler(my_walker, rods);
|
||||
} else { // we have an old style traversal, once we're done return
|
||||
|
|
@ -181,13 +181,15 @@ public class GenomeAnalysisEngine {
|
|||
Utils.scareUser(String.format("Analysis %s doesn't support SAM/BAM reads, but a read file %s was provided", argCollection.analysisName, argCollection.samFiles));
|
||||
|
||||
// create the MicroScheduler
|
||||
microScheduler = MicroScheduler.create(my_walker, argCollection.samFiles, argCollection.referenceFile, rods, argCollection.numberOfThreads);
|
||||
if( argCollection.walkAllLoci )
|
||||
Utils.scareUser("Argument --all_loci is deprecated. Please annotate your walker with @By(DataSource.REFERENCE) to perform a by-reference traversal.");
|
||||
microScheduler = MicroScheduler.create(my_walker, new Reads(argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads);
|
||||
engine = microScheduler.getTraversalEngine();
|
||||
}
|
||||
else if (my_walker instanceof ReadWalker) {
|
||||
if (argCollection.referenceFile == null)
|
||||
Utils.scareUser(String.format("Locus-based traversals require a reference file but none was given"));
|
||||
microScheduler = MicroScheduler.create(my_walker, argCollection.samFiles, argCollection.referenceFile, rods, argCollection.numberOfThreads);
|
||||
microScheduler = MicroScheduler.create(my_walker, new Reads(argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads);
|
||||
engine = microScheduler.getTraversalEngine();
|
||||
}
|
||||
|
||||
|
|
@ -209,26 +211,14 @@ public class GenomeAnalysisEngine {
|
|||
if (argCollection.intervals != null) {
|
||||
engine.setLocation(setupIntervalRegion());
|
||||
}
|
||||
// hmm...
|
||||
if (argCollection.maximumReadSorts != null) {
|
||||
engine.setSortOnFly(Integer.parseInt(argCollection.maximumReadSorts));
|
||||
}
|
||||
|
||||
if (argCollection.downsampleFraction != null) {
|
||||
engine.setDownsampleByFraction(Double.parseDouble(argCollection.downsampleFraction));
|
||||
}
|
||||
engine.setReadFilters(new Reads(argCollection));
|
||||
|
||||
if (argCollection.downsampleCoverage != null) {
|
||||
engine.setDownsampleByCoverage(Integer.parseInt(argCollection.downsampleCoverage));
|
||||
}
|
||||
|
||||
engine.setSafetyChecking(!argCollection.unsafe);
|
||||
engine.setThreadedIO(argCollection.enabledThreadedIO);
|
||||
engine.setWalkOverAllSites(argCollection.walkAllLoci);
|
||||
engine.initialize();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* setup the interval regions, from either the interval file of the genome region string
|
||||
*
|
||||
|
|
|
|||
|
|
@ -0,0 +1,95 @@
|
|||
package org.broadinstitute.sting.gatk;
|
||||
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.List;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 14, 2009
|
||||
* Time: 4:06:26 PM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A data structure containing information about the reads data sources as well as
|
||||
* information about how they should be downsampled, sorted, and filtered.
|
||||
*/
|
||||
public class Reads {
|
||||
private List<File> readsFiles = null;
|
||||
|
||||
private Double downsamplingFraction = null;
|
||||
private Integer downsampleToCoverage = null;
|
||||
private Integer maxOnFlySorts = null;
|
||||
private Boolean beSafe = null;
|
||||
|
||||
/**
|
||||
* Gets a list of the files acting as sources of reads.
|
||||
* @return A list of files storing reads data.
|
||||
*/
|
||||
public List<File> getReadsFiles() {
|
||||
return readsFiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the fraction of reads to downsample.
|
||||
* @return Downsample fraction.
|
||||
*/
|
||||
public Double getDownsamplingFraction() {
|
||||
return downsamplingFraction;
|
||||
}
|
||||
|
||||
/**
|
||||
* Downsample each locus to the specified coverage.
|
||||
* @return Coverage to which to downsample.
|
||||
*/
|
||||
public Integer getDownsampleToCoverage() {
|
||||
return downsampleToCoverage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the maximum number of supported on-the-fly sorts.
|
||||
* @return Max number of on-the-fly sorts.
|
||||
*/
|
||||
public Integer getMaxOnTheFlySorts() {
|
||||
return maxOnFlySorts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return whether to 'verify' the reads as we pass through them.
|
||||
* @return Whether to verify the reads.
|
||||
*/
|
||||
public Boolean getSafetyChecking() {
|
||||
return beSafe;
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple constructor for unit testing.
|
||||
* @param readsFiles List of reads files to open.
|
||||
*/
|
||||
public Reads( List<File> readsFiles ) {
|
||||
this.readsFiles = readsFiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the command-line arguments having to do with reads input
|
||||
* files and store them in an easy-to-work-with package. Constructor
|
||||
* is package protected.
|
||||
* @param arguments GATK parsed command-line arguments.
|
||||
*/
|
||||
Reads( GATKArgumentCollection arguments ) {
|
||||
this.readsFiles = arguments.samFiles;
|
||||
if (arguments.downsampleFraction != null) downsamplingFraction = arguments.downsampleFraction;
|
||||
if (arguments.downsampleCoverage != null) downsampleToCoverage = arguments.downsampleCoverage;
|
||||
if (arguments.maximumReadSorts != null) maxOnFlySorts = arguments.maximumReadSorts;
|
||||
beSafe = !arguments.unsafe;
|
||||
}
|
||||
}
|
||||
|
|
@ -15,8 +15,11 @@ import java.util.Map;
|
|||
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.By;
|
||||
import org.broadinstitute.sting.utils.JVMUtils;
|
||||
import org.broadinstitute.sting.utils.PathUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
|
|
@ -95,10 +98,28 @@ public class WalkerManager {
|
|||
return (Walker) walker.newInstance();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the walker class given a walker name.
|
||||
* @param walkerName Name of the walker.
|
||||
* @return Class representing the walker.
|
||||
*/
|
||||
public Class getWalkerClassByName(String walkerName) {
|
||||
return walkers.get(walkerName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the data source for the provided walker.
|
||||
* @param walker The walker.
|
||||
* @return Which type of data source to traverse over...reads or reference?
|
||||
*/
|
||||
public static DataSource getWalkerDataSource(Walker walker) {
|
||||
Class<? extends Walker> walkerClass = walker.getClass();
|
||||
By byDataSource = walkerClass.getAnnotation(By.class);
|
||||
if( byDataSource == null )
|
||||
throw new StingException("Unable to find By annotation for walker class " + walkerClass.getName());
|
||||
return byDataSource.value();
|
||||
}
|
||||
|
||||
/**
|
||||
* Load classes internal to the classpath from an arbitrary location.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -1,17 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
import edu.mit.broad.picard.filter.FilteringIterator;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusContextIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
/**
|
||||
* User: hanna
|
||||
|
|
@ -30,24 +22,25 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
* A LocusContextQueue over which the user can iterate.
|
||||
*/
|
||||
|
||||
public class IterableLocusContextQueue implements LocusContextQueue, LocusIterator {
|
||||
private Shard shard;
|
||||
private LocusContextIterator loci;
|
||||
|
||||
public class IterableLocusContextQueue extends LocusContextQueue implements LocusIterator {
|
||||
/**
|
||||
* What's the context for the last locus accessed?
|
||||
* @param provider
|
||||
*/
|
||||
private LocusContext nextLocusContext = null;
|
||||
private LocusContext prefetched = null;
|
||||
|
||||
/**
|
||||
* Has this prefetch been consumed? If this flag is set,
|
||||
* the prefetch will skip to the next argument in the system.
|
||||
*/
|
||||
private boolean prefetchConsumed = true;
|
||||
|
||||
/**
|
||||
* Create a new queue of locus contexts.
|
||||
* @param provider
|
||||
*/
|
||||
public IterableLocusContextQueue(ShardDataProvider provider) {
|
||||
Iterator<SAMRecord> reads = new FilteringIterator(provider.getReadIterator(), new TraversalEngine.locusStreamFilterFunc());
|
||||
this.loci = new LocusContextIteratorByHanger(reads);
|
||||
this.shard = provider.getShard();
|
||||
super( provider );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -55,7 +48,8 @@ public class IterableLocusContextQueue implements LocusContextQueue, LocusIterat
|
|||
* @return True if another locus present in this iterator. Otherwise, false.
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
return loci.hasNext();
|
||||
prefetchLocusContext();
|
||||
return prefetched != null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -63,12 +57,36 @@ public class IterableLocusContextQueue implements LocusContextQueue, LocusIterat
|
|||
* @return Next element in the queue.
|
||||
*/
|
||||
public GenomeLoc next() {
|
||||
do {
|
||||
nextLocusContext = loci.next();
|
||||
prefetchLocusContext();
|
||||
prefetchConsumed = true;
|
||||
// Signal that the prefetcher needs to grab another entry off the queue.
|
||||
return prefetched.getLocation();
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the next locus context within the bounds of a member variable and store
|
||||
* it in the prefetched member variable. When the prefetch is consumed, the 'consumer'
|
||||
* should signal it as such by marking prefetchConsumed = true.
|
||||
*/
|
||||
private void prefetchLocusContext() {
|
||||
if( !prefetchConsumed )
|
||||
return;
|
||||
|
||||
prefetched = null;
|
||||
prefetchConsumed = false;
|
||||
|
||||
// If another locus context bounded by this shard exists, find it.
|
||||
boolean prefetchOutOfBounds = true;
|
||||
while( hasNextLocusContext() && prefetchOutOfBounds ) {
|
||||
prefetched = getNextLocusContext();
|
||||
prefetchOutOfBounds = (prefetched.getLocation().isBefore(shard.getGenomeLoc()) ||
|
||||
prefetched.getLocation().isPast(shard.getGenomeLoc()));
|
||||
}
|
||||
while( nextLocusContext.getLocation().isBefore(shard.getGenomeLoc()) );
|
||||
|
||||
return nextLocusContext.getLocation();
|
||||
|
||||
// Can't find a valid prefetch? Set prefetch to null. If prefetched == null and
|
||||
// prefetchConsumed == false, the queue is out of entries.
|
||||
if( prefetchOutOfBounds )
|
||||
prefetched = null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -83,9 +101,9 @@ public class IterableLocusContextQueue implements LocusContextQueue, LocusIterat
|
|||
* @return
|
||||
*/
|
||||
public LocusContext peek() {
|
||||
if( nextLocusContext == null )
|
||||
if( prefetched == null )
|
||||
throw new NoSuchElementException("No more elements remaining in queue");
|
||||
return nextLocusContext;
|
||||
return prefetched;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -93,10 +111,8 @@ public class IterableLocusContextQueue implements LocusContextQueue, LocusIterat
|
|||
* @param seekPoint
|
||||
*/
|
||||
public LocusContextQueue seek( GenomeLoc seekPoint ) {
|
||||
if( nextLocusContext == null || !seekPoint.equals(nextLocusContext.getLocation()) ) {
|
||||
nextLocusContext = null;
|
||||
if( prefetched == null || !seekPoint.equals(prefetched.getLocation()) )
|
||||
throw new IllegalArgumentException("IterableLocusContextQueue doesn't support seeking and iterator is in the wrong position.");
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,17 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusContextIterator;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
import edu.mit.broad.picard.filter.FilteringIterator;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 13, 2009
|
||||
|
|
@ -19,17 +29,50 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
* A queue of locus context entries.
|
||||
*/
|
||||
|
||||
public interface LocusContextQueue {
|
||||
public abstract class LocusContextQueue {
|
||||
protected Shard shard;
|
||||
|
||||
private Reads sourceInfo;
|
||||
private LocusContextIterator loci;
|
||||
|
||||
public LocusContextQueue(ShardDataProvider provider) {
|
||||
Iterator<SAMRecord> reads = new FilteringIterator(provider.getReadIterator(), new TraversalEngine.locusStreamFilterFunc());
|
||||
this.loci = new LocusContextIteratorByHanger(reads);
|
||||
this.sourceInfo = provider.getReadIterator().getSourceInfo();
|
||||
this.shard = provider.getShard();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the locus context at the given position.
|
||||
* @return Locus context, or null if no locus context exists at this position.
|
||||
*/
|
||||
LocusContext peek();
|
||||
public abstract LocusContext peek();
|
||||
|
||||
/**
|
||||
* Seek to the given point the queue of locus contexts.
|
||||
* @param target Target base pair to which to seek. Must be a single base pair.
|
||||
* @return an instance of itself for parameter chaining.
|
||||
*/
|
||||
public LocusContextQueue seek(GenomeLoc target);
|
||||
public abstract LocusContextQueue seek(GenomeLoc target);
|
||||
|
||||
/**
|
||||
* Gets the next locus context, applying filtering as necessary.
|
||||
* @return Locus context to work with.
|
||||
*/
|
||||
protected LocusContext getNextLocusContext() {
|
||||
LocusContext next = loci.next();
|
||||
if( sourceInfo.getDownsampleToCoverage() != null )
|
||||
next.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() );
|
||||
return next;
|
||||
}
|
||||
|
||||
/**
|
||||
* hasNext()-style iterator for base iterator.
|
||||
* @return
|
||||
*/
|
||||
protected boolean hasNextLocusContext() {
|
||||
return loci.hasNext();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.dataSources.providers;
|
|||
import org.broadinstitute.sting.gatk.iterators.LocusContextIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -31,10 +32,7 @@ import edu.mit.broad.picard.filter.FilteringIterator;
|
|||
* implementation of java.util.Queue interface.
|
||||
*/
|
||||
|
||||
public class SeekableLocusContextQueue implements LocusContextQueue {
|
||||
private Shard shard;
|
||||
private LocusContextIterator loci;
|
||||
|
||||
public class SeekableLocusContextQueue extends LocusContextQueue {
|
||||
/**
|
||||
* Gets the position to which the last seek was requested.
|
||||
*/
|
||||
|
|
@ -53,15 +51,13 @@ public class SeekableLocusContextQueue implements LocusContextQueue {
|
|||
* @param provider
|
||||
*/
|
||||
public SeekableLocusContextQueue(ShardDataProvider provider) {
|
||||
Iterator<SAMRecord> reads = new FilteringIterator(provider.getReadIterator(), new TraversalEngine.locusStreamFilterFunc());
|
||||
this.loci = new LocusContextIteratorByHanger(reads);
|
||||
this.shard = provider.getShard();
|
||||
super(provider);
|
||||
|
||||
// Seed the state tracking members with the first possible seek position and the first possible locus context.
|
||||
seekPoint = new GenomeLoc(shard.getGenomeLoc().getContigIndex(),shard.getGenomeLoc().getStart());
|
||||
|
||||
if( loci.hasNext() )
|
||||
nextLocusContext = loci.next();
|
||||
if( hasNextLocusContext() )
|
||||
nextLocusContext = getNextLocusContext();
|
||||
else
|
||||
nextLocusContext = this.createEmptyLocusContext(seekPoint);
|
||||
}
|
||||
|
|
@ -94,15 +90,15 @@ public class SeekableLocusContextQueue implements LocusContextQueue {
|
|||
seekPoint = (GenomeLoc)target.clone();
|
||||
|
||||
// Search for the next locus context following the target positions.
|
||||
while (nextLocusContext.getLocation().isBefore(target) && loci.hasNext() ) {
|
||||
while (nextLocusContext.getLocation().isBefore(target) && hasNextLocusContext() ) {
|
||||
logger.debug(String.format(" current locus is %s vs %s => %d", nextLocusContext.getLocation(),
|
||||
target,
|
||||
nextLocusContext.getLocation().compareTo(target)));
|
||||
nextLocusContext = loci.next();
|
||||
nextLocusContext = getNextLocusContext();
|
||||
}
|
||||
|
||||
// Couldn't find a next? Force the nextLocusContext to null.
|
||||
if( nextLocusContext.getLocation().isBefore(target) && !loci.hasNext() )
|
||||
if( nextLocusContext.getLocation().isBefore(target) && !hasNextLocusContext() )
|
||||
nextLocusContext = createEmptyLocusContext( seekPoint );
|
||||
|
||||
return this;
|
||||
|
|
|
|||
|
|
@ -11,6 +11,9 @@ import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
|||
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
|
|
@ -32,6 +35,11 @@ import java.util.List;
|
|||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
public class SAMDataSource implements SimpleDataSource {
|
||||
/**
|
||||
* Backing support for reads.
|
||||
*/
|
||||
private Reads reads = null;
|
||||
|
||||
/** our SAM data files */
|
||||
private final SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate;
|
||||
|
||||
|
|
@ -65,25 +73,18 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
/**
|
||||
* constructor, given sam files
|
||||
*
|
||||
* @param samFiles the list of sam files
|
||||
* @param reads the list of sam files
|
||||
*/
|
||||
public SAMDataSource(List<?> samFiles) throws SimpleDataSourceLoadException {
|
||||
public SAMDataSource(Reads reads) throws SimpleDataSourceLoadException {
|
||||
this.reads = reads;
|
||||
|
||||
// check the length
|
||||
if (samFiles.size() < 1) {
|
||||
if (reads.getReadsFiles().size() < 1) {
|
||||
throw new SimpleDataSourceLoadException("SAMDataSource: you must provide a list of length greater then 0");
|
||||
}
|
||||
for (Object fileName : samFiles) {
|
||||
File smFile;
|
||||
if (samFiles.get(0) instanceof String) {
|
||||
smFile = new File((String) samFiles.get(0));
|
||||
} else if (samFiles.get(0) instanceof File) {
|
||||
smFile = (File) fileName;
|
||||
} else {
|
||||
throw new SimpleDataSourceLoadException("SAMDataSource: unknown samFile list type, must be String or File");
|
||||
}
|
||||
|
||||
for (File smFile : reads.getReadsFiles()) {
|
||||
if (!smFile.canRead()) {
|
||||
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + fileName);
|
||||
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName());
|
||||
}
|
||||
samFileList.add(smFile);
|
||||
|
||||
|
|
@ -120,7 +121,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
* @param location the genome location to extract data for
|
||||
* @return an iterator for that region
|
||||
*/
|
||||
public MergingSamRecordIterator2 seekLocus(GenomeLoc location) throws SimpleDataSourceLoadException {
|
||||
public StingSAMIterator seekLocus(GenomeLoc location) throws SimpleDataSourceLoadException {
|
||||
|
||||
// right now this is pretty damn heavy, it copies the file list into a reader list every time
|
||||
SamFileHeaderMerger headerMerger = createHeaderMerger();
|
||||
|
|
@ -132,7 +133,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
iter.queryOverlapping(location.getContig(), (int) location.getStart(), (int) location.getStop() + 1);
|
||||
|
||||
// return the iterator
|
||||
return iter;
|
||||
return StingSAMIteratorAdapter.adapt( reads, iter );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -144,13 +145,26 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
* @return an iterator for that region
|
||||
*/
|
||||
public StingSAMIterator seek(Shard shard) throws SimpleDataSourceLoadException {
|
||||
StingSAMIterator iterator = null;
|
||||
if (shard.getShardType() == Shard.ShardType.READ) {
|
||||
return seekRead((ReadShard) shard);
|
||||
iterator = seekRead((ReadShard) shard);
|
||||
iterator = TraversalEngine.applyDecoratingIterators(true,
|
||||
iterator,
|
||||
reads.getDownsamplingFraction(),
|
||||
reads.getMaxOnTheFlySorts(),
|
||||
reads.getSafetyChecking());
|
||||
} else if (shard.getShardType() == Shard.ShardType.LOCUS) {
|
||||
return seekLocus(shard.getGenomeLoc());
|
||||
iterator = seekLocus(shard.getGenomeLoc());
|
||||
iterator = TraversalEngine.applyDecoratingIterators(false,
|
||||
iterator,
|
||||
reads.getDownsamplingFraction(),
|
||||
reads.getMaxOnTheFlySorts(),
|
||||
reads.getSafetyChecking());
|
||||
} else {
|
||||
throw new StingException("seek: Unknown shard type");
|
||||
}
|
||||
|
||||
return iterator;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -204,7 +218,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
|
||||
if (bound == null) {
|
||||
shard.signalDone();
|
||||
bound = new BoundedReadIterator(iter, 0);
|
||||
bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter), 0);
|
||||
}
|
||||
return bound;
|
||||
}
|
||||
|
|
@ -258,7 +272,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
|
||||
// we're good, increment our read cout
|
||||
this.readsTaken += readCount;
|
||||
return new BoundedReadIterator(iter, readCount);
|
||||
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter), readCount);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -276,7 +290,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
if (lastReadPos == null) {
|
||||
lastReadPos = new GenomeLoc(iter.getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, 0);
|
||||
iter.queryContained(lastReadPos.getContig(), 1, -1);
|
||||
bound = new BoundedReadIterator(iter, readCount);
|
||||
bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter), readCount);
|
||||
this.readsTaken = readCount;
|
||||
}
|
||||
// we're not at the beginning, not at the end, so we move forward with our ghastly plan...
|
||||
|
|
@ -324,7 +338,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
SamFileHeaderMerger mg = createHeaderMerger();
|
||||
iter = new MergingSamRecordIterator2(mg);
|
||||
iter.queryContained(lastReadPos.getContig(), 1, Integer.MAX_VALUE);
|
||||
return new BoundedReadIterator(iter,readCount);
|
||||
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter),readCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -349,7 +363,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
throw new StingException("Danger: weve run out reads in fastMappedReadSeek");
|
||||
//return null;
|
||||
}
|
||||
bound = new BoundedReadIterator(iter, readCount);
|
||||
bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter), readCount);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
|
|||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -58,7 +59,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Reduce
|
|||
* @param refFile Reference for driving the traversal.
|
||||
* @param nThreadsToUse maximum number of threads to use to do the work
|
||||
*/
|
||||
protected HierarchicalMicroScheduler( Walker walker, List<File> reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods, int nThreadsToUse ) {
|
||||
protected HierarchicalMicroScheduler( Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods, int nThreadsToUse ) {
|
||||
super( walker, reads, refFile, rods );
|
||||
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
|
||||
if( !(traversalEngine instanceof TraverseLociByReference) )
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
|
|||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -20,7 +21,7 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
* @param reads Reads file(s) to process.
|
||||
* @param refFile Reference for driving the traversal.
|
||||
*/
|
||||
protected LinearMicroScheduler( Walker walker, List<File> reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods ) {
|
||||
protected LinearMicroScheduler( Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods ) {
|
||||
super(walker, reads, refFile, rods);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
|
|||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
|
|
@ -51,7 +52,7 @@ public abstract class MicroScheduler {
|
|||
* @param nThreadsToUse Number of threads to utilize.
|
||||
* @return The best-fit microscheduler.
|
||||
*/
|
||||
public static MicroScheduler create( Walker walker, List<File> reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods, int nThreadsToUse ) {
|
||||
public static MicroScheduler create( Walker walker, Reads reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods, int nThreadsToUse ) {
|
||||
if( walker instanceof TreeReducible && nThreadsToUse > 1 ) {
|
||||
logger.info("Creating hierarchical microscheduler");
|
||||
return new HierarchicalMicroScheduler( walker, reads, ref, rods, nThreadsToUse );
|
||||
|
|
@ -67,11 +68,11 @@ public abstract class MicroScheduler {
|
|||
* @param reads The reads.
|
||||
* @param refFile File pointer to the reference.
|
||||
*/
|
||||
protected MicroScheduler( Walker walker, List<File> reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods ) {
|
||||
protected MicroScheduler( Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods ) {
|
||||
if (walker instanceof ReadWalker) {
|
||||
traversalEngine = new TraverseReads(reads, refFile, rods);
|
||||
traversalEngine = new TraverseReads(reads.getReadsFiles(), refFile, rods);
|
||||
} else {
|
||||
traversalEngine = new TraverseLociByReference(reads, refFile, rods);
|
||||
traversalEngine = new TraverseLociByReference(reads.getReadsFiles(), refFile, rods);
|
||||
}
|
||||
|
||||
this.reads = getReadsDataSource( reads );
|
||||
|
|
@ -132,16 +133,8 @@ public abstract class MicroScheduler {
|
|||
* Gets a data source for the given set of reads.
|
||||
* @return A data source for the given set of reads.
|
||||
*/
|
||||
private SAMDataSource getReadsDataSource( List<File> reads ) {
|
||||
List<File> unpackedReads = null;
|
||||
try {
|
||||
unpackedReads = TraversalEngine.unpackReads(reads);
|
||||
}
|
||||
catch( FileNotFoundException ex ) {
|
||||
throw new StingException( "Cannot unpack list of reads files", ex );
|
||||
}
|
||||
|
||||
SAMDataSource dataSource = new SAMDataSource( unpackedReads );
|
||||
private SAMDataSource getReadsDataSource( Reads reads ) {
|
||||
SAMDataSource dataSource = new SAMDataSource( reads );
|
||||
|
||||
// Side effect: initialize the traversal engine with reads data.
|
||||
// TODO: Give users a dedicated way of getting the header so that the MicroScheduler
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ import net.sf.samtools.SAMRecord;
|
|||
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
|
|
@ -68,6 +70,14 @@ public class BoundedReadIterator implements StingSAMIterator {
|
|||
this.doNotUseThatUnmappedReadPile = useThem;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves information about reads sources.
|
||||
* @return Info about the sources of reads.
|
||||
*/
|
||||
public Reads getSourceInfo() {
|
||||
return iterator.getSourceInfo();
|
||||
}
|
||||
|
||||
|
||||
public SAMFileHeader getHeader() {
|
||||
// todo: this is bad, we need an iterface out there for samrecords that supports getting the header,
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ import net.sf.samtools.SAMRecord;
|
|||
import java.util.Iterator;
|
||||
import java.util.Random;
|
||||
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
|
||||
|
||||
public class DownsampleIterator implements StingSAMIterator {
|
||||
|
||||
|
|
@ -20,6 +22,15 @@ public class DownsampleIterator implements StingSAMIterator {
|
|||
next = getNextRecord();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves information about reads sources.
|
||||
* @return Info about the sources of reads.
|
||||
*/
|
||||
public Reads getSourceInfo() {
|
||||
return it.getSourceInfo();
|
||||
}
|
||||
|
||||
|
||||
public boolean hasNext() {
|
||||
return next != null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,7 +15,10 @@ import edu.mit.broad.picard.sam.ReservedTagConstants;
|
|||
import edu.mit.broad.picard.sam.SamFileHeaderMerger;
|
||||
import edu.mit.broad.picard.util.PeekableIterator;
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.util.Comparator;
|
||||
|
|
@ -28,7 +31,7 @@ import java.util.PriorityQueue;
|
|||
* iterable stream. The underlying iterators/files must all have the same sort order unless
|
||||
* the requested output format is unsorted, in which case any combination is valid.
|
||||
*/
|
||||
public class MergingSamRecordIterator2 implements StingSAMIterator {
|
||||
public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>, Iterable<SAMRecord> {
|
||||
protected PriorityQueue<ComparableSamRecordIterator> pq = null;
|
||||
protected final SamFileHeaderMerger samHeaderMerger;
|
||||
protected final SAMFileHeader.SortOrder sortOrder;
|
||||
|
|
@ -272,6 +275,7 @@ public class MergingSamRecordIterator2 implements StingSAMIterator {
|
|||
|
||||
// Should replace picard class with the same name
|
||||
class ComparableSamRecordIterator extends PeekableIterator<SAMRecord> implements Comparable<ComparableSamRecordIterator>, StingSAMIterator {
|
||||
private Reads sourceInfo;
|
||||
private final Comparator<SAMRecord> comparator;
|
||||
private final SAMFileReader reader;
|
||||
|
||||
|
|
@ -295,6 +299,12 @@ class ComparableSamRecordIterator extends PeekableIterator<SAMRecord> implements
|
|||
this.comparator = comparator;
|
||||
}
|
||||
|
||||
public Reads getSourceInfo() {
|
||||
if( sourceInfo == null )
|
||||
throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework.");
|
||||
return sourceInfo;
|
||||
}
|
||||
|
||||
/** Returns the reader from which this iterator was constructed. */
|
||||
public SAMFileReader getReader() {
|
||||
return reader;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import org.broadinstitute.sting.utils.ComparableSAMRecord;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
|
|
@ -11,11 +13,11 @@ import java.util.Iterator;
|
|||
// TODO: Deprecate?
|
||||
// I don't think we need this if we're only allowing sorted and indexed BAM Files in the GATK - Aaron
|
||||
public class SortSamIterator implements StingSAMIterator {
|
||||
private Reads sourceInfo;
|
||||
private Iterator<ComparableSAMRecord> it;
|
||||
|
||||
Iterator<ComparableSAMRecord> it;
|
||||
|
||||
public SortSamIterator(Iterator<SAMRecord> unsortedIter, int maxSorts) {
|
||||
|
||||
public SortSamIterator(StingSAMIterator unsortedIter, int maxSorts) {
|
||||
sourceInfo = unsortedIter.getSourceInfo();
|
||||
ArrayList<ComparableSAMRecord> list = new ArrayList<ComparableSAMRecord>();
|
||||
while (unsortedIter.hasNext()) {
|
||||
list.add(new ComparableSAMRecord(unsortedIter.next()));
|
||||
|
|
@ -27,6 +29,16 @@ public class SortSamIterator implements StingSAMIterator {
|
|||
it = list.iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves information about reads sources.
|
||||
* @return Info about the sources of reads.
|
||||
*/
|
||||
public Reads getSourceInfo() {
|
||||
if( sourceInfo == null )
|
||||
throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework.");
|
||||
return sourceInfo;
|
||||
}
|
||||
|
||||
public boolean hasNext() { return it.hasNext(); }
|
||||
public SAMRecord next() { return it.next().getRecord(); }
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.iterators;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
|
|
@ -28,4 +29,10 @@ import net.sf.samtools.util.CloseableIterator;
|
|||
* This is the standard interface for all iterators in the Sting package that iterate over SAMRecords
|
||||
*/
|
||||
public interface StingSAMIterator extends CloseableIterator<SAMRecord>, Iterable<SAMRecord> {
|
||||
/**
|
||||
* Gets source information for the reads. Contains information about the original reads
|
||||
* files, plus information about downsampling, etc.
|
||||
* @return
|
||||
*/
|
||||
public Reads getSourceInfo();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,9 @@ import net.sf.samtools.util.CloseableIterator;
|
|||
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
|
|
@ -31,14 +34,14 @@ import java.util.Iterator;
|
|||
* <p/>
|
||||
* This class adapts other SAMRecord iterators to the StingSAMIterator
|
||||
*/
|
||||
public class StingSAMIteratorAdapter {
|
||||
public class StingSAMIteratorAdapter {
|
||||
|
||||
public static StingSAMIterator adapt(Iterator<SAMRecord> iter) {
|
||||
return new PrivateStringSAMIterator(iter);
|
||||
public static StingSAMIterator adapt(Reads sourceInfo, Iterator<SAMRecord> iter) {
|
||||
return new PrivateStringSAMIterator(sourceInfo, iter);
|
||||
}
|
||||
|
||||
public static StingSAMIterator adapt(CloseableIterator<SAMRecord> iter) {
|
||||
return new PrivateStringSAMCloseableIterator(iter);
|
||||
public static StingSAMIterator adapt(Reads sourceInfo, CloseableIterator<SAMRecord> iter) {
|
||||
return new PrivateStringSAMCloseableIterator(sourceInfo, iter);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -49,12 +52,20 @@ public class StingSAMIteratorAdapter {
|
|||
* methods that implement the iterable<> interface and the close() method from CloseableIterator
|
||||
*/
|
||||
class PrivateStringSAMIterator implements StingSAMIterator {
|
||||
private Reads sourceInfo = null;
|
||||
private Iterator<SAMRecord> iter = null;
|
||||
|
||||
PrivateStringSAMIterator(Iterator<SAMRecord> iter) {
|
||||
PrivateStringSAMIterator(Reads sourceInfo, Iterator<SAMRecord> iter) {
|
||||
this.sourceInfo = sourceInfo;
|
||||
this.iter = iter;
|
||||
}
|
||||
|
||||
public Reads getSourceInfo() {
|
||||
if( sourceInfo == null )
|
||||
throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework.");
|
||||
return sourceInfo;
|
||||
}
|
||||
|
||||
public void close() {
|
||||
// do nothing, we can't close the iterator anyway.
|
||||
}
|
||||
|
|
@ -82,12 +93,20 @@ class PrivateStringSAMIterator implements StingSAMIterator {
|
|||
* methods that implement the iterable<> interface.
|
||||
*/
|
||||
class PrivateStringSAMCloseableIterator implements StingSAMIterator {
|
||||
private Reads sourceInfo = null;
|
||||
private CloseableIterator<SAMRecord> iter = null;
|
||||
|
||||
PrivateStringSAMCloseableIterator(CloseableIterator<SAMRecord> iter) {
|
||||
PrivateStringSAMCloseableIterator(Reads sourceInfo, CloseableIterator<SAMRecord> iter) {
|
||||
this.sourceInfo = sourceInfo;
|
||||
this.iter = iter;
|
||||
}
|
||||
|
||||
public Reads getSourceInfo() {
|
||||
if( sourceInfo == null )
|
||||
throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework.");
|
||||
return sourceInfo;
|
||||
}
|
||||
|
||||
public void close() {
|
||||
iter.close();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.iterators;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.RuntimeIOException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
|
|
@ -23,6 +24,15 @@ public class VerifyingSamIterator implements StingSAMIterator {
|
|||
this.it = it;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves information about reads sources.
|
||||
* @return Info about the sources of reads.
|
||||
*/
|
||||
public Reads getSourceInfo() {
|
||||
return it.getSourceInfo();
|
||||
}
|
||||
|
||||
|
||||
public boolean hasNext() { return this.it.hasNext(); }
|
||||
public SAMRecord next() {
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||
|
||||
|
|
@ -131,11 +132,11 @@ public abstract class TraversalEngine {
|
|||
this.walkOverAllSites = walkOverAllSites;
|
||||
}
|
||||
|
||||
public void setDebugging(final boolean d) {
|
||||
private void setDebugging(final boolean d) {
|
||||
DEBUGGING = d;
|
||||
}
|
||||
|
||||
public void setSafetyChecking(final boolean beSafeP) {
|
||||
private void setSafetyChecking(final boolean beSafeP) {
|
||||
if (!beSafeP)
|
||||
logger.warn("*** Turning off safety checking, I hope you know what you are doing. Errors will result in debugging assert failures and other inscrutable messages...");
|
||||
this.beSafeP = beSafeP;
|
||||
|
|
@ -147,26 +148,39 @@ public abstract class TraversalEngine {
|
|||
this.FILTER_UNSORTED_READS = filterUnsorted;
|
||||
}
|
||||
|
||||
public void setSortOnFly(final int maxReadsToSort) {
|
||||
private void setSortOnFly(final int maxReadsToSort) {
|
||||
logger.info("Sorting read file on the fly: max reads allowed is " + maxReadsToSort);
|
||||
SORT_ON_FLY = true;
|
||||
maxOnFlySorts = maxReadsToSort;
|
||||
}
|
||||
|
||||
public void setSortOnFly() { setSortOnFly(100000); }
|
||||
private void setSortOnFly() { setSortOnFly(100000); }
|
||||
|
||||
public void setDownsampleByFraction(final double fraction) {
|
||||
private void setDownsampleByFraction(final double fraction) {
|
||||
logger.info("Downsampling to approximately " + (fraction * 100.0) + "% of filtered reads");
|
||||
DOWNSAMPLE_BY_FRACTION = true;
|
||||
downsamplingFraction = fraction;
|
||||
}
|
||||
|
||||
public void setDownsampleByCoverage(final int coverage) {
|
||||
private void setDownsampleByCoverage(final int coverage) {
|
||||
logger.info("Downsampling to coverage " + coverage);
|
||||
DOWNSAMPLE_BY_COVERAGE = true;
|
||||
downsamplingCoverage = coverage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets up read filtering performed by the traversal engine.
|
||||
* @param reads Read filters to instantiate as the traversal engine walks over the data.
|
||||
* @deprecated Should only be used by old-style traversals.
|
||||
*/
|
||||
@Deprecated
|
||||
public void setReadFilters( Reads reads) {
|
||||
if( reads.getDownsamplingFraction() != null ) setDownsampleByFraction(reads.getDownsamplingFraction());
|
||||
if( reads.getDownsampleToCoverage() != null ) setDownsampleByCoverage(reads.getDownsampleToCoverage());
|
||||
if( reads.getMaxOnTheFlySorts() != null ) setSortOnFly(reads.getMaxOnTheFlySorts());
|
||||
if( reads.getSafetyChecking() != null ) setSafetyChecking(reads.getSafetyChecking());
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// functions for dealing locations (areas of the genome we're traversing over)
|
||||
|
|
@ -318,36 +332,9 @@ public abstract class TraversalEngine {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unpack the files to be processed, given a list of files. That list of files can
|
||||
* itself contain lists of other files to be read.
|
||||
* @param inputFiles
|
||||
* @return
|
||||
*/
|
||||
public static List<File> unpackReads( List<File> inputFiles ) throws FileNotFoundException {
|
||||
List<File> unpackedReads = new ArrayList<File>();
|
||||
for( File inputFile: inputFiles ) {
|
||||
if( inputFile.getName().endsWith(".list") ) {
|
||||
for( String fileName : new xReadLines(inputFile) )
|
||||
unpackedReads.add( new File(fileName) );
|
||||
}
|
||||
else
|
||||
unpackedReads.add( inputFile );
|
||||
}
|
||||
return unpackedReads;
|
||||
}
|
||||
|
||||
protected Iterator<SAMRecord> initializeReads() {
|
||||
List<File> allReadsFiles = null;
|
||||
try {
|
||||
allReadsFiles = unpackReads( readsFiles );
|
||||
}
|
||||
catch( FileNotFoundException ex ) {
|
||||
throw new RuntimeException("Unable to unpack reads", ex );
|
||||
}
|
||||
|
||||
if( allReadsFiles.size() == 1 )
|
||||
samReader = initializeSAMFile(allReadsFiles.get(0));
|
||||
if( readsFiles.size() == 1 )
|
||||
samReader = initializeSAMFile(readsFiles.get(0));
|
||||
else
|
||||
samReader = null;
|
||||
return wrapReadsIterator(getReadsIterator(samReader), true);
|
||||
|
|
@ -358,16 +345,9 @@ public abstract class TraversalEngine {
|
|||
if ( samReader == null && readsFiles.size() > 0 ) {
|
||||
SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate;
|
||||
|
||||
List<File> allReadsFiles = null;
|
||||
List<SAMFileReader> readers = new ArrayList<SAMFileReader>();
|
||||
try {
|
||||
allReadsFiles = unpackReads( readsFiles );
|
||||
}
|
||||
catch(FileNotFoundException ex) {
|
||||
throw new RuntimeException("Unable to unpack reads", ex);
|
||||
}
|
||||
|
||||
for ( File readsFile: allReadsFiles ) {
|
||||
for ( File readsFile: readsFiles ) {
|
||||
SAMFileReader reader = initializeSAMFile(readsFile);
|
||||
readers.add(reader);
|
||||
}
|
||||
|
|
@ -382,28 +362,52 @@ public abstract class TraversalEngine {
|
|||
|
||||
@Deprecated
|
||||
protected StingSAMIterator wrapReadsIterator( final Iterator<SAMRecord> rawIterator, final boolean enableVerification ) {
|
||||
StingSAMIterator wrappedIterator = StingSAMIteratorAdapter.adapt(rawIterator);
|
||||
wrappedIterator = applyDecoratingIterators(enableVerification, wrappedIterator);
|
||||
|
||||
// Reads sourceInfo is gone by this point in the traversal engine. Stub in a null and rely on the iterator to
|
||||
// throw an exception if reads info isn't present.
|
||||
StingSAMIterator wrappedIterator = StingSAMIteratorAdapter.adapt(null,rawIterator);
|
||||
wrappedIterator = applyDecoratingIterators(enableVerification,wrappedIterator);
|
||||
|
||||
if (THREADED_IO) {
|
||||
logger.info(String.format("Enabling threaded I/O with buffer of %d reads", THREADED_IO_BUFFER_SIZE));
|
||||
wrappedIterator = StingSAMIteratorAdapter.adapt(new ThreadedIterator<SAMRecord>(wrappedIterator, THREADED_IO_BUFFER_SIZE));
|
||||
wrappedIterator = StingSAMIteratorAdapter.adapt(null,new ThreadedIterator<SAMRecord>(wrappedIterator, THREADED_IO_BUFFER_SIZE));
|
||||
}
|
||||
|
||||
return wrappedIterator;
|
||||
}
|
||||
|
||||
protected StingSAMIterator applyDecoratingIterators(boolean enableVerification, StingSAMIterator wrappedIterator) {
|
||||
/**
|
||||
* Repackage instance variables and call static method.
|
||||
* TODO: This method's days are numbered.
|
||||
* @param enableVerification
|
||||
* @param wrappedIterator
|
||||
* @return
|
||||
*/
|
||||
protected StingSAMIterator applyDecoratingIterators( final boolean enableVerification, final StingSAMIterator wrappedIterator ) {
|
||||
return applyDecoratingIterators(enableVerification,
|
||||
wrappedIterator,
|
||||
DOWNSAMPLE_BY_FRACTION ? downsamplingFraction : null,
|
||||
SORT_ON_FLY ? maxOnFlySorts : null,
|
||||
beSafeP);
|
||||
}
|
||||
|
||||
/**
|
||||
* WARNING: In TraversalEngine for backward compatibility ONLY. Reads are not used as the data source, only as parameters
|
||||
* for validation.
|
||||
*/
|
||||
public static StingSAMIterator applyDecoratingIterators(boolean enableVerification,
|
||||
StingSAMIterator wrappedIterator,
|
||||
Double downsamplingFraction,
|
||||
Integer maxOnFlySorts,
|
||||
Boolean beSafeP) {
|
||||
// NOTE: this (and other filtering) should be done before on-the-fly sorting
|
||||
// as there is no reason to sort something that we will end of throwing away
|
||||
if (DOWNSAMPLE_BY_FRACTION)
|
||||
if (downsamplingFraction != null)
|
||||
wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction);
|
||||
|
||||
if (SORT_ON_FLY)
|
||||
if (maxOnFlySorts != null)
|
||||
wrappedIterator = new SortSamIterator(wrappedIterator, maxOnFlySorts);
|
||||
|
||||
if (beSafeP && enableVerification)
|
||||
if (beSafeP != null && beSafeP && enableVerification)
|
||||
wrappedIterator = new VerifyingSamIterator(wrappedIterator);
|
||||
return wrappedIterator;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,11 +2,15 @@ package org.broadinstitute.sting.gatk.traversals;
|
|||
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceLocusIterator;
|
||||
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.dataSources.providers.SeekableLocusContextQueue;
|
||||
import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextQueue;
|
||||
import org.broadinstitute.sting.gatk.dataSources.providers.IterableLocusContextQueue;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -33,7 +37,6 @@ public class TraverseLociByReference extends TraversalEngine {
|
|||
*/
|
||||
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
|
||||
|
||||
public TraverseLociByReference(List<File> reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||
super( reads, ref, rods );
|
||||
}
|
||||
|
|
@ -57,8 +60,23 @@ public class TraverseLociByReference extends TraversalEngine {
|
|||
|
||||
LocusWalker<M, T> locusWalker = (LocusWalker<M, T>)walker;
|
||||
|
||||
LocusIterator locusIterator = new ReferenceLocusIterator( dataProvider );
|
||||
SeekableLocusContextQueue locusContextQueue = new SeekableLocusContextQueue( dataProvider );
|
||||
LocusIterator locusIterator = null;
|
||||
LocusContextQueue locusContextQueue = null;
|
||||
|
||||
DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
|
||||
switch( dataSource ) {
|
||||
case REFERENCE:
|
||||
locusIterator = new ReferenceLocusIterator( dataProvider );
|
||||
locusContextQueue = new SeekableLocusContextQueue( dataProvider );
|
||||
break;
|
||||
case READS:
|
||||
IterableLocusContextQueue iterableQueue = new IterableLocusContextQueue( dataProvider );
|
||||
locusIterator = iterableQueue;
|
||||
locusContextQueue = iterableQueue;
|
||||
break;
|
||||
default:
|
||||
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
||||
}
|
||||
|
||||
// We keep processing while the next reference location is within the interval
|
||||
while( locusIterator.hasNext() ) {
|
||||
|
|
@ -72,9 +90,6 @@ public class TraverseLociByReference extends TraversalEngine {
|
|||
LocusContext locus = locusContextQueue.seek( site ).peek();
|
||||
char refBase = dataProvider.getReferenceBase( site );
|
||||
|
||||
if ( DOWNSAMPLE_BY_COVERAGE )
|
||||
locus.downsampleToCoverage(downsamplingCoverage);
|
||||
|
||||
final boolean keepMeP = locusWalker.filter(tracker, refBase, locus);
|
||||
if (keepMeP) {
|
||||
M x = locusWalker.map(tracker, refBase, locus);
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ import org.broadinstitute.sting.gatk.LocusContext;
|
|||
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
|
|
@ -88,11 +87,8 @@ public class TraverseReads extends TraversalEngine {
|
|||
|
||||
ReadWalker<M, T> readWalker = (ReadWalker<M, T>) walker;
|
||||
|
||||
// we allow a bunch of wrapping iterators for down sampling, threadingIO, etc.
|
||||
StingSAMIterator it = applyDecoratingIterators(true, dataProvider.getReadIterator());
|
||||
|
||||
// while we still have more reads
|
||||
for (SAMRecord read : it) {
|
||||
for (SAMRecord read : dataProvider.getReadIterator()) {
|
||||
|
||||
// our locus context
|
||||
LocusContext locus = null;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,33 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import java.lang.annotation.Documented;
|
||||
import java.lang.annotation.Inherited;
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
import java.lang.annotation.Target;
|
||||
import java.lang.annotation.ElementType;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 14, 2009
|
||||
* Time: 1:51:22 PM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Allows the walker to indicate what type of data it wants to consume.
|
||||
*/
|
||||
|
||||
@Documented
|
||||
@Inherited
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.TYPE)
|
||||
public @interface By {
|
||||
DataSource value();
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 14, 2009
|
||||
* Time: 2:12:33 PM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Allow user to choose between a number of different data sources.
|
||||
*/
|
||||
public enum DataSource {
|
||||
READS,
|
||||
REFERENCE
|
||||
}
|
||||
|
|
@ -1,11 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mdepristo
|
||||
|
|
@ -13,6 +10,7 @@ import java.util.List;
|
|||
* Time: 2:52:28 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@By(DataSource.READS)
|
||||
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ import net.sf.samtools.SAMRecord;
|
|||
* Time: 11:23:14 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@By(DataSource.REFERENCE)
|
||||
public class PrintLocusContextWalker extends LocusWalker<LocusContext, Integer> implements TreeReducible<Integer> {
|
||||
public LocusContext map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
out.printf( "In map: ref = %c, loc = %s, reads = %s%n", ref,
|
||||
|
|
|
|||
|
|
@ -82,9 +82,9 @@ public class GATKArgumentCollectionTest extends BaseTest {
|
|||
collect.HAPMAPChipFile = "HAPMAPChipFile".toLowerCase();
|
||||
collect.enabledThreadedIO = true;
|
||||
collect.unsafe = false;
|
||||
collect.maximumReadSorts = "maximumReadSorts".toLowerCase();
|
||||
collect.downsampleFraction = "downsampleFraction".toLowerCase();
|
||||
collect.downsampleCoverage = "downsampleCoverage".toLowerCase();
|
||||
collect.maximumReadSorts = null;
|
||||
collect.downsampleFraction = null;
|
||||
collect.downsampleCoverage = null;
|
||||
collect.intervals = "intervals".toLowerCase();
|
||||
collect.walkAllLoci = true;
|
||||
collect.disableThreading = false;
|
||||
|
|
|
|||
|
|
@ -84,7 +84,8 @@ public class IterableLocusContextQueueTest extends LocusContextQueueTemplate {
|
|||
if(new GenomeLoc(read).containsP(locusContext.getLocation()))
|
||||
Assert.assertTrue("Target locus context does not contain reads", locusContext.getReads().contains(read) );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Assert.assertFalse("Iterator is not bounded at boundaries of shard", iterableQueue.hasNext());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,8 +7,10 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.LocusShard;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.File;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Iterator;
|
||||
|
|
@ -294,6 +296,11 @@ public abstract class LocusContextQueueTemplate extends BaseTest {
|
|||
backingIterator = backingList.iterator();
|
||||
}
|
||||
|
||||
public Reads getSourceInfo() {
|
||||
// There are no sources for these reads.
|
||||
return new Reads(new ArrayList<File>());
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return backingIterator.hasNext();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@ import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
|||
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||
import org.junit.After;
|
||||
|
|
@ -45,7 +47,7 @@ import java.util.List;
|
|||
*/
|
||||
public class SAMBAMDataSourceTest extends BaseTest {
|
||||
|
||||
private List<String> fl;
|
||||
private List<File> fl;
|
||||
private FastaSequenceFile2 seq;
|
||||
|
||||
/**
|
||||
|
|
@ -55,7 +57,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
|||
*/
|
||||
@Before
|
||||
public void doForEachTest() {
|
||||
fl = new ArrayList<String>();
|
||||
fl = new ArrayList<File>();
|
||||
|
||||
// sequence
|
||||
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||
|
|
@ -83,18 +85,18 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
|||
int count = 0;
|
||||
|
||||
// setup the data
|
||||
fl.add(oneKGLocation + "/pilot3/sams/NA12892.bam");
|
||||
|
||||
fl.add(new File(oneKGLocation + "/pilot3/sams/NA12892.bam"));
|
||||
Reads reads = new Reads(fl);
|
||||
|
||||
try {
|
||||
SAMDataSource data = new SAMDataSource(fl);
|
||||
SAMDataSource data = new SAMDataSource(reads);
|
||||
for (Shard sh : strat) {
|
||||
int readCount = 0;
|
||||
count++;
|
||||
|
||||
logger.debug("Start : " + sh.getGenomeLoc().getStart() + " stop : " + sh.getGenomeLoc().getStop() + " contig " + sh.getGenomeLoc().getContig());
|
||||
logger.debug("count = " + count);
|
||||
MergingSamRecordIterator2 datum = (MergingSamRecordIterator2)data.seek(sh);
|
||||
StingSAMIterator datum = data.seek(sh);
|
||||
|
||||
// for the first couple of shards make sure we can see the reads
|
||||
if (count < 5) {
|
||||
|
|
@ -126,7 +128,8 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
|||
|
||||
|
||||
// setup the test files
|
||||
fl.add(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188.aligned.duplicates_marked.bam");
|
||||
fl.add(new File(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188.aligned.duplicates_marked.bam"));
|
||||
Reads reads = new Reads(fl);
|
||||
|
||||
ArrayList<Integer> readcountPerShard = new ArrayList<Integer>();
|
||||
ArrayList<Integer> readcountPerShard2 = new ArrayList<Integer>();
|
||||
|
|
@ -136,7 +139,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
|||
int count = 0;
|
||||
|
||||
try {
|
||||
SAMDataSource data = new SAMDataSource(fl);
|
||||
SAMDataSource data = new SAMDataSource(reads);
|
||||
for (Shard sh : strat) {
|
||||
int readCount = 0;
|
||||
count++;
|
||||
|
|
@ -144,7 +147,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
|||
break;
|
||||
}
|
||||
|
||||
MergingSamRecordIterator2 datum = (MergingSamRecordIterator2)data.seek(sh);
|
||||
StingSAMIterator datum = data.seek(sh);
|
||||
|
||||
for (SAMRecord r : datum) {
|
||||
readCount++;
|
||||
|
|
@ -163,15 +166,17 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
|||
|
||||
// setup the data and the counter before our second run
|
||||
fl.clear();
|
||||
fl.add(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188-01A-01W.aligned.duplicates_marked.bam");
|
||||
fl.add(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188-10B-01W.aligned.duplicates_marked.bam");
|
||||
fl.add(new File(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188-01A-01W.aligned.duplicates_marked.bam"));
|
||||
fl.add(new File(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188-10B-01W.aligned.duplicates_marked.bam"));
|
||||
reads = new Reads(fl);
|
||||
|
||||
count = 0;
|
||||
// the sharding strat.
|
||||
strat = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, seq.getSequenceDictionary(), 100000);
|
||||
|
||||
logger.debug("Pile two:");
|
||||
try {
|
||||
SAMDataSource data = new SAMDataSource(fl);
|
||||
SAMDataSource data = new SAMDataSource(reads);
|
||||
for (Shard sh : strat) {
|
||||
int readCount = 0;
|
||||
count++;
|
||||
|
|
@ -181,7 +186,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
|||
break;
|
||||
}
|
||||
|
||||
MergingSamRecordIterator2 datum = (MergingSamRecordIterator2)data.seek(sh);
|
||||
StingSAMIterator datum = data.seek(sh);
|
||||
|
||||
for (SAMRecord r : datum) {
|
||||
readCount++;
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
|||
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
|
@ -46,7 +47,7 @@ import java.util.List;
|
|||
public class SAMByReadsTest extends BaseTest {
|
||||
|
||||
private FastaSequenceFile2 seq;
|
||||
private List<String> fl;
|
||||
private List<File> fl;
|
||||
|
||||
/**
|
||||
* This function does the setup of our parser, before each method call.
|
||||
|
|
@ -55,7 +56,7 @@ public class SAMByReadsTest extends BaseTest {
|
|||
*/
|
||||
@Before
|
||||
public void doForEachTest() {
|
||||
fl = new ArrayList<String>();
|
||||
fl = new ArrayList<File>();
|
||||
|
||||
// sequence
|
||||
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly17/v0/Homo_sapiens_assembly17.fasta"));
|
||||
|
|
@ -69,13 +70,15 @@ public class SAMByReadsTest extends BaseTest {
|
|||
logger.warn("Executing testTotalReadCount");
|
||||
|
||||
// setup the test files
|
||||
fl.add("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam");
|
||||
fl.add(new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam"));
|
||||
Reads reads = new Reads(fl);
|
||||
|
||||
final int targetReadCount = 5000;
|
||||
|
||||
ShardStrategy shardStrategy = ShardStrategyFactory.shatterByReadCount(seq.getSequenceDictionary(),targetReadCount);
|
||||
|
||||
try {
|
||||
SAMDataSource data = new SAMDataSource(fl);
|
||||
SAMDataSource data = new SAMDataSource(reads);
|
||||
|
||||
// check the total read count
|
||||
final int totalReads = 10000;
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
|
|||
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
|
@ -48,7 +49,7 @@ import java.util.List;
|
|||
public class BoundedReadIteratorTest extends BaseTest {
|
||||
|
||||
/** the file list and the fasta sequence */
|
||||
private List<String> fl;
|
||||
private List<File> fl;
|
||||
private FastaSequenceFile2 seq;
|
||||
|
||||
/**
|
||||
|
|
@ -58,7 +59,7 @@ public class BoundedReadIteratorTest extends BaseTest {
|
|||
*/
|
||||
@Before
|
||||
public void doForEachTest() {
|
||||
fl = new ArrayList<String>();
|
||||
fl = new ArrayList<File>();
|
||||
|
||||
// sequence
|
||||
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||
|
|
@ -76,14 +77,15 @@ public class BoundedReadIteratorTest extends BaseTest {
|
|||
|
||||
|
||||
// setup the test files
|
||||
fl.add(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188.aligned.duplicates_marked.bam");
|
||||
fl.add(new File(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188.aligned.duplicates_marked.bam"));
|
||||
Reads reads = new Reads(fl);
|
||||
|
||||
// our target read
|
||||
final long boundedReadCount = 100;
|
||||
long shardReadCount = 0;
|
||||
|
||||
try {
|
||||
SAMDataSource data = new SAMDataSource(fl);
|
||||
SAMDataSource data = new SAMDataSource(reads);
|
||||
|
||||
// make sure we have a shard
|
||||
if (!strat.hasNext()) {
|
||||
|
|
@ -92,8 +94,8 @@ public class BoundedReadIteratorTest extends BaseTest {
|
|||
Shard sd = strat.next();
|
||||
|
||||
|
||||
MergingSamRecordIterator2 datum = (MergingSamRecordIterator2)data.seek(sd);
|
||||
MergingSamRecordIterator2 datum2 = (MergingSamRecordIterator2)data.seek(sd);
|
||||
StingSAMIterator datum = data.seek(sd);
|
||||
StingSAMIterator datum2 = data.seek(sd);
|
||||
|
||||
// check the reads in the shard
|
||||
for (SAMRecord r : datum) {
|
||||
|
|
@ -102,7 +104,7 @@ public class BoundedReadIteratorTest extends BaseTest {
|
|||
}
|
||||
|
||||
// create the bounded iterator
|
||||
BoundedReadIterator iter = new BoundedReadIterator(datum2, boundedReadCount);
|
||||
BoundedReadIterator iter = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,datum2), boundedReadCount);
|
||||
|
||||
// now see how many reads are in the bounded iterator
|
||||
int readCount = 0;
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ public class StingSAMIteratorAdapterTest extends BaseTest {
|
|||
final int COUNT = 100;
|
||||
MyTestIterator it = new MyTestIterator();
|
||||
|
||||
StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(it);
|
||||
StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(null,it);
|
||||
int countCheck = 0;
|
||||
while (samIt.hasNext()) {
|
||||
samIt.next();
|
||||
|
|
@ -116,7 +116,7 @@ public class StingSAMIteratorAdapterTest extends BaseTest {
|
|||
|
||||
MyTestCloseableIterator it = new MyTestCloseableIterator();
|
||||
|
||||
StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(it);
|
||||
StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(null,it);
|
||||
|
||||
int countCheck = 0;
|
||||
while (samIt.hasNext()) {
|
||||
|
|
@ -133,7 +133,7 @@ public class StingSAMIteratorAdapterTest extends BaseTest {
|
|||
|
||||
MyTestCloseableIterator it = new MyTestCloseableIterator();
|
||||
|
||||
StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(it);
|
||||
StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(null,it);
|
||||
|
||||
|
||||
int countCheck = 0;
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
|||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.walkers.CountReadsWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
|
|
@ -121,15 +122,7 @@ public class TraverseReadsTest extends BaseTest {
|
|||
ref.getSequenceDictionary(),
|
||||
readSize);
|
||||
|
||||
List<File> unpackedReads = null;
|
||||
try {
|
||||
unpackedReads = TraversalEngine.unpackReads(bamList);
|
||||
}
|
||||
catch (FileNotFoundException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
||||
SAMDataSource dataSource = new SAMDataSource(unpackedReads);
|
||||
SAMDataSource dataSource = new SAMDataSource(new Reads(bamList));
|
||||
dataSource.viewUnmappedReads(false);
|
||||
|
||||
countReadWalker.initialize();
|
||||
|
|
@ -176,15 +169,8 @@ public class TraverseReadsTest extends BaseTest {
|
|||
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,
|
||||
ref.getSequenceDictionary(),
|
||||
readSize);
|
||||
List<File> unpackedReads = null;
|
||||
try {
|
||||
unpackedReads = TraversalEngine.unpackReads(bamList);
|
||||
}
|
||||
catch (FileNotFoundException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
||||
SAMDataSource dataSource = new SAMDataSource(unpackedReads);
|
||||
SAMDataSource dataSource = new SAMDataSource(new Reads(bamList));
|
||||
dataSource.viewUnmappedReads(true);
|
||||
|
||||
countReadWalker.initialize();
|
||||
|
|
|
|||
Loading…
Reference in New Issue