diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index 1cd75e5f3..e6ceb15b5 100755 --- a/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -1,11 +1,18 @@ package org.broadinstitute.sting.gatk; import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.xReadLines; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection; import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; +import java.io.FileNotFoundException; +import java.io.File; +import java.util.List; +import java.util.ArrayList; + /** * * User: aaron @@ -80,6 +87,9 @@ public class CommandLineGATK extends CommandLineProgram { } loadArgumentsIntoObject(argCollection); loadArgumentsIntoObject(mWalker); + + processArguments(argCollection); + this.argCollection.analysisName = this.analysisName; try { GATKEngine = new GenomeAnalysisEngine(argCollection, mWalker); @@ -132,4 +142,38 @@ public class CommandLineGATK extends CommandLineProgram { this.argCollection = argCollection; } + + /** + * Preprocess the arguments before submitting them to the GATK engine. + * @param argCollection Collection of arguments to preprocess. + */ + private void processArguments( GATKArgumentCollection argCollection ) { + argCollection.samFiles = unpackReads( argCollection.samFiles ); + } + + /** + * Unpack the files to be processed, given a list of files. That list of files can + * itself contain lists of other files to be read. + * @param inputFiles + * @return + */ + private List unpackReads( List inputFiles ) { + List unpackedReads = new ArrayList(); + for( File inputFile: inputFiles ) { + if( inputFile.getName().endsWith(".list") ) { + try { + for( String fileName : new xReadLines(inputFile) ) + unpackedReads.add( new File(fileName) ); + } + catch( FileNotFoundException ex ) { + throw new StingException("Unable to find file while unpacking reads", ex); + } + } + else + unpackedReads.add( inputFile ); + } + return unpackedReads; + } + + } diff --git a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java index e0a8164e8..aa0fd53cb 100755 --- a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java @@ -96,15 +96,15 @@ public class GATKArgumentCollection { @Element(required=false) @Argument(fullName = "sort_on_the_fly", shortName = "sort", doc = "Maximum number of reads to sort on the fly", required = false) - public String maximumReadSorts = null; + public Integer maximumReadSorts = null; @Element(required=false) @Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false) - public String downsampleFraction = null; + public Double downsampleFraction = null; @Element(required=false) @Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to", required = false) - public String downsampleCoverage = null; + public Integer downsampleCoverage = null; @Element(required=false) @Argument(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false) @@ -238,13 +238,16 @@ public class GATKArgumentCollection { if (!other.unsafe.equals(this.unsafe)) { return false; } - if (!other.maximumReadSorts.equals(this.maximumReadSorts)) { + if ((other.maximumReadSorts == null && this.maximumReadSorts != null) || + (other.maximumReadSorts != null && !other.maximumReadSorts.equals(this.maximumReadSorts))) { return false; } - if (!other.downsampleFraction.equals(this.downsampleFraction)) { + if ((other.downsampleFraction == null && this.downsampleFraction != null) || + (other.downsampleFraction != null && !other.downsampleFraction.equals(this.downsampleFraction))) { return false; } - if (!other.downsampleCoverage.equals(this.downsampleCoverage)) { + if ((other.downsampleCoverage == null && this.downsampleCoverage != null) || + (other.downsampleCoverage != null && !other.downsampleCoverage.equals(this.downsampleCoverage))) { return false; } if (!other.walkAllLoci.equals(this.walkAllLoci)) { diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index b710ac252..1e8944f81 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -84,7 +84,7 @@ public class GenomeAnalysisEngine { // if we're a read or a locus walker, we use the new system. Right now we have complicated // branching based on the input data, but this should disapear when all the traversals are switched over - if ((my_walker instanceof LocusWalker && argCollection.walkAllLoci && !(argCollection.samFiles == null || argCollection.samFiles.size() == 0)) || + if ((my_walker instanceof LocusWalker && !(argCollection.samFiles == null || argCollection.samFiles.size() == 0)) || my_walker instanceof ReadWalker) { microScheduler = createMicroscheduler(my_walker, rods); } else { // we have an old style traversal, once we're done return @@ -181,13 +181,15 @@ public class GenomeAnalysisEngine { Utils.scareUser(String.format("Analysis %s doesn't support SAM/BAM reads, but a read file %s was provided", argCollection.analysisName, argCollection.samFiles)); // create the MicroScheduler - microScheduler = MicroScheduler.create(my_walker, argCollection.samFiles, argCollection.referenceFile, rods, argCollection.numberOfThreads); + if( argCollection.walkAllLoci ) + Utils.scareUser("Argument --all_loci is deprecated. Please annotate your walker with @By(DataSource.REFERENCE) to perform a by-reference traversal."); + microScheduler = MicroScheduler.create(my_walker, new Reads(argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads); engine = microScheduler.getTraversalEngine(); } else if (my_walker instanceof ReadWalker) { if (argCollection.referenceFile == null) Utils.scareUser(String.format("Locus-based traversals require a reference file but none was given")); - microScheduler = MicroScheduler.create(my_walker, argCollection.samFiles, argCollection.referenceFile, rods, argCollection.numberOfThreads); + microScheduler = MicroScheduler.create(my_walker, new Reads(argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads); engine = microScheduler.getTraversalEngine(); } @@ -209,26 +211,14 @@ public class GenomeAnalysisEngine { if (argCollection.intervals != null) { engine.setLocation(setupIntervalRegion()); } - // hmm... - if (argCollection.maximumReadSorts != null) { - engine.setSortOnFly(Integer.parseInt(argCollection.maximumReadSorts)); - } - if (argCollection.downsampleFraction != null) { - engine.setDownsampleByFraction(Double.parseDouble(argCollection.downsampleFraction)); - } + engine.setReadFilters(new Reads(argCollection)); - if (argCollection.downsampleCoverage != null) { - engine.setDownsampleByCoverage(Integer.parseInt(argCollection.downsampleCoverage)); - } - - engine.setSafetyChecking(!argCollection.unsafe); engine.setThreadedIO(argCollection.enabledThreadedIO); engine.setWalkOverAllSites(argCollection.walkAllLoci); engine.initialize(); } - /** * setup the interval regions, from either the interval file of the genome region string * diff --git a/java/src/org/broadinstitute/sting/gatk/Reads.java b/java/src/org/broadinstitute/sting/gatk/Reads.java new file mode 100755 index 000000000..bca047b3f --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/Reads.java @@ -0,0 +1,95 @@ +package org.broadinstitute.sting.gatk; + +import org.broadinstitute.sting.gatk.traversals.TraversalEngine; +import org.broadinstitute.sting.utils.StingException; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.List; +/** + * User: hanna + * Date: May 14, 2009 + * Time: 4:06:26 PM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * A data structure containing information about the reads data sources as well as + * information about how they should be downsampled, sorted, and filtered. + */ +public class Reads { + private List readsFiles = null; + + private Double downsamplingFraction = null; + private Integer downsampleToCoverage = null; + private Integer maxOnFlySorts = null; + private Boolean beSafe = null; + + /** + * Gets a list of the files acting as sources of reads. + * @return A list of files storing reads data. + */ + public List getReadsFiles() { + return readsFiles; + } + + /** + * Get the fraction of reads to downsample. + * @return Downsample fraction. + */ + public Double getDownsamplingFraction() { + return downsamplingFraction; + } + + /** + * Downsample each locus to the specified coverage. + * @return Coverage to which to downsample. + */ + public Integer getDownsampleToCoverage() { + return downsampleToCoverage; + } + + /** + * Get the maximum number of supported on-the-fly sorts. + * @return Max number of on-the-fly sorts. + */ + public Integer getMaxOnTheFlySorts() { + return maxOnFlySorts; + } + + /** + * Return whether to 'verify' the reads as we pass through them. + * @return Whether to verify the reads. + */ + public Boolean getSafetyChecking() { + return beSafe; + } + + /** + * Simple constructor for unit testing. + * @param readsFiles List of reads files to open. + */ + public Reads( List readsFiles ) { + this.readsFiles = readsFiles; + } + + /** + * Extract the command-line arguments having to do with reads input + * files and store them in an easy-to-work-with package. Constructor + * is package protected. + * @param arguments GATK parsed command-line arguments. + */ + Reads( GATKArgumentCollection arguments ) { + this.readsFiles = arguments.samFiles; + if (arguments.downsampleFraction != null) downsamplingFraction = arguments.downsampleFraction; + if (arguments.downsampleCoverage != null) downsampleToCoverage = arguments.downsampleCoverage; + if (arguments.maximumReadSorts != null) maxOnFlySorts = arguments.maximumReadSorts; + beSafe = !arguments.unsafe; + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/java/src/org/broadinstitute/sting/gatk/WalkerManager.java index e20600725..5b03fdcb5 100755 --- a/java/src/org/broadinstitute/sting/gatk/WalkerManager.java +++ b/java/src/org/broadinstitute/sting/gatk/WalkerManager.java @@ -15,8 +15,11 @@ import java.util.Map; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.WalkerName; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.By; import org.broadinstitute.sting.utils.JVMUtils; import org.broadinstitute.sting.utils.PathUtils; +import org.broadinstitute.sting.utils.StingException; import org.apache.log4j.Logger; /** @@ -95,10 +98,28 @@ public class WalkerManager { return (Walker) walker.newInstance(); } + /** + * Retrieves the walker class given a walker name. + * @param walkerName Name of the walker. + * @return Class representing the walker. + */ public Class getWalkerClassByName(String walkerName) { return walkers.get(walkerName); } + /** + * Gets the data source for the provided walker. + * @param walker The walker. + * @return Which type of data source to traverse over...reads or reference? + */ + public static DataSource getWalkerDataSource(Walker walker) { + Class walkerClass = walker.getClass(); + By byDataSource = walkerClass.getAnnotation(By.class); + if( byDataSource == null ) + throw new StingException("Unable to find By annotation for walker class " + walkerClass.getName()); + return byDataSource.value(); + } + /** * Load classes internal to the classpath from an arbitrary location. * diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/IterableLocusContextQueue.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/IterableLocusContextQueue.java index 851383eef..65312d289 100755 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/IterableLocusContextQueue.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/IterableLocusContextQueue.java @@ -1,17 +1,9 @@ package org.broadinstitute.sting.gatk.dataSources.providers; -import net.sf.samtools.SAMRecord; - -import java.util.Iterator; import java.util.NoSuchElementException; -import edu.mit.broad.picard.filter.FilteringIterator; -import org.broadinstitute.sting.gatk.traversals.TraversalEngine; -import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger; -import org.broadinstitute.sting.gatk.iterators.LocusContextIterator; import org.broadinstitute.sting.gatk.iterators.LocusIterator; import org.broadinstitute.sting.gatk.LocusContext; -import org.broadinstitute.sting.gatk.dataSources.shards.Shard; import org.broadinstitute.sting.utils.GenomeLoc; /** * User: hanna @@ -30,24 +22,25 @@ import org.broadinstitute.sting.utils.GenomeLoc; * A LocusContextQueue over which the user can iterate. */ -public class IterableLocusContextQueue implements LocusContextQueue, LocusIterator { - private Shard shard; - private LocusContextIterator loci; - +public class IterableLocusContextQueue extends LocusContextQueue implements LocusIterator { /** * What's the context for the last locus accessed? * @param provider */ - private LocusContext nextLocusContext = null; + private LocusContext prefetched = null; + + /** + * Has this prefetch been consumed? If this flag is set, + * the prefetch will skip to the next argument in the system. + */ + private boolean prefetchConsumed = true; /** * Create a new queue of locus contexts. * @param provider */ public IterableLocusContextQueue(ShardDataProvider provider) { - Iterator reads = new FilteringIterator(provider.getReadIterator(), new TraversalEngine.locusStreamFilterFunc()); - this.loci = new LocusContextIteratorByHanger(reads); - this.shard = provider.getShard(); + super( provider ); } /** @@ -55,7 +48,8 @@ public class IterableLocusContextQueue implements LocusContextQueue, LocusIterat * @return True if another locus present in this iterator. Otherwise, false. */ public boolean hasNext() { - return loci.hasNext(); + prefetchLocusContext(); + return prefetched != null; } /** @@ -63,12 +57,36 @@ public class IterableLocusContextQueue implements LocusContextQueue, LocusIterat * @return Next element in the queue. */ public GenomeLoc next() { - do { - nextLocusContext = loci.next(); + prefetchLocusContext(); + prefetchConsumed = true; + // Signal that the prefetcher needs to grab another entry off the queue. + return prefetched.getLocation(); + } + + /** + * Find the next locus context within the bounds of a member variable and store + * it in the prefetched member variable. When the prefetch is consumed, the 'consumer' + * should signal it as such by marking prefetchConsumed = true. + */ + private void prefetchLocusContext() { + if( !prefetchConsumed ) + return; + + prefetched = null; + prefetchConsumed = false; + + // If another locus context bounded by this shard exists, find it. + boolean prefetchOutOfBounds = true; + while( hasNextLocusContext() && prefetchOutOfBounds ) { + prefetched = getNextLocusContext(); + prefetchOutOfBounds = (prefetched.getLocation().isBefore(shard.getGenomeLoc()) || + prefetched.getLocation().isPast(shard.getGenomeLoc())); } - while( nextLocusContext.getLocation().isBefore(shard.getGenomeLoc()) ); - - return nextLocusContext.getLocation(); + + // Can't find a valid prefetch? Set prefetch to null. If prefetched == null and + // prefetchConsumed == false, the queue is out of entries. + if( prefetchOutOfBounds ) + prefetched = null; } /** @@ -83,9 +101,9 @@ public class IterableLocusContextQueue implements LocusContextQueue, LocusIterat * @return */ public LocusContext peek() { - if( nextLocusContext == null ) + if( prefetched == null ) throw new NoSuchElementException("No more elements remaining in queue"); - return nextLocusContext; + return prefetched; } /** @@ -93,10 +111,8 @@ public class IterableLocusContextQueue implements LocusContextQueue, LocusIterat * @param seekPoint */ public LocusContextQueue seek( GenomeLoc seekPoint ) { - if( nextLocusContext == null || !seekPoint.equals(nextLocusContext.getLocation()) ) { - nextLocusContext = null; + if( prefetched == null || !seekPoint.equals(prefetched.getLocation()) ) throw new IllegalArgumentException("IterableLocusContextQueue doesn't support seeking and iterator is in the wrong position."); - } return this; } diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/LocusContextQueue.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/LocusContextQueue.java index 44142b501..ac3e6f8d0 100755 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/LocusContextQueue.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/LocusContextQueue.java @@ -1,7 +1,17 @@ package org.broadinstitute.sting.gatk.dataSources.providers; import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.dataSources.shards.Shard; +import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger; +import org.broadinstitute.sting.gatk.iterators.LocusContextIterator; +import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.utils.GenomeLoc; +import net.sf.samtools.SAMRecord; + +import java.util.Iterator; + +import edu.mit.broad.picard.filter.FilteringIterator; /** * User: hanna * Date: May 13, 2009 @@ -19,17 +29,50 @@ import org.broadinstitute.sting.utils.GenomeLoc; * A queue of locus context entries. */ -public interface LocusContextQueue { +public abstract class LocusContextQueue { + protected Shard shard; + + private Reads sourceInfo; + private LocusContextIterator loci; + + public LocusContextQueue(ShardDataProvider provider) { + Iterator reads = new FilteringIterator(provider.getReadIterator(), new TraversalEngine.locusStreamFilterFunc()); + this.loci = new LocusContextIteratorByHanger(reads); + this.sourceInfo = provider.getReadIterator().getSourceInfo(); + this.shard = provider.getShard(); + } + + /** * Get the locus context at the given position. * @return Locus context, or null if no locus context exists at this position. */ - LocusContext peek(); + public abstract LocusContext peek(); /** * Seek to the given point the queue of locus contexts. * @param target Target base pair to which to seek. Must be a single base pair. * @return an instance of itself for parameter chaining. */ - public LocusContextQueue seek(GenomeLoc target); + public abstract LocusContextQueue seek(GenomeLoc target); + + /** + * Gets the next locus context, applying filtering as necessary. + * @return Locus context to work with. + */ + protected LocusContext getNextLocusContext() { + LocusContext next = loci.next(); + if( sourceInfo.getDownsampleToCoverage() != null ) + next.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() ); + return next; + } + + /** + * hasNext()-style iterator for base iterator. + * @return + */ + protected boolean hasNextLocusContext() { + return loci.hasNext(); + } + } diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/SeekableLocusContextQueue.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/SeekableLocusContextQueue.java index 2009f597a..ad1628104 100755 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/SeekableLocusContextQueue.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/SeekableLocusContextQueue.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.dataSources.providers; import org.broadinstitute.sting.gatk.iterators.LocusContextIterator; import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger; import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.dataSources.shards.Shard; import org.broadinstitute.sting.utils.GenomeLoc; @@ -31,10 +32,7 @@ import edu.mit.broad.picard.filter.FilteringIterator; * implementation of java.util.Queue interface. */ -public class SeekableLocusContextQueue implements LocusContextQueue { - private Shard shard; - private LocusContextIterator loci; - +public class SeekableLocusContextQueue extends LocusContextQueue { /** * Gets the position to which the last seek was requested. */ @@ -53,15 +51,13 @@ public class SeekableLocusContextQueue implements LocusContextQueue { * @param provider */ public SeekableLocusContextQueue(ShardDataProvider provider) { - Iterator reads = new FilteringIterator(provider.getReadIterator(), new TraversalEngine.locusStreamFilterFunc()); - this.loci = new LocusContextIteratorByHanger(reads); - this.shard = provider.getShard(); + super(provider); // Seed the state tracking members with the first possible seek position and the first possible locus context. seekPoint = new GenomeLoc(shard.getGenomeLoc().getContigIndex(),shard.getGenomeLoc().getStart()); - if( loci.hasNext() ) - nextLocusContext = loci.next(); + if( hasNextLocusContext() ) + nextLocusContext = getNextLocusContext(); else nextLocusContext = this.createEmptyLocusContext(seekPoint); } @@ -94,15 +90,15 @@ public class SeekableLocusContextQueue implements LocusContextQueue { seekPoint = (GenomeLoc)target.clone(); // Search for the next locus context following the target positions. - while (nextLocusContext.getLocation().isBefore(target) && loci.hasNext() ) { + while (nextLocusContext.getLocation().isBefore(target) && hasNextLocusContext() ) { logger.debug(String.format(" current locus is %s vs %s => %d", nextLocusContext.getLocation(), target, nextLocusContext.getLocation().compareTo(target))); - nextLocusContext = loci.next(); + nextLocusContext = getNextLocusContext(); } // Couldn't find a next? Force the nextLocusContext to null. - if( nextLocusContext.getLocation().isBefore(target) && !loci.hasNext() ) + if( nextLocusContext.getLocation().isBefore(target) && !hasNextLocusContext() ) nextLocusContext = createEmptyLocusContext( seekPoint ); return this; diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java index cd72550af..0d3808781 100755 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java @@ -11,6 +11,9 @@ import org.broadinstitute.sting.gatk.dataSources.shards.Shard; import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator; import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter; +import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; @@ -32,6 +35,11 @@ import java.util.List; * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. */ public class SAMDataSource implements SimpleDataSource { + /** + * Backing support for reads. + */ + private Reads reads = null; + /** our SAM data files */ private final SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate; @@ -65,25 +73,18 @@ public class SAMDataSource implements SimpleDataSource { /** * constructor, given sam files * - * @param samFiles the list of sam files + * @param reads the list of sam files */ - public SAMDataSource(List samFiles) throws SimpleDataSourceLoadException { + public SAMDataSource(Reads reads) throws SimpleDataSourceLoadException { + this.reads = reads; + // check the length - if (samFiles.size() < 1) { + if (reads.getReadsFiles().size() < 1) { throw new SimpleDataSourceLoadException("SAMDataSource: you must provide a list of length greater then 0"); } - for (Object fileName : samFiles) { - File smFile; - if (samFiles.get(0) instanceof String) { - smFile = new File((String) samFiles.get(0)); - } else if (samFiles.get(0) instanceof File) { - smFile = (File) fileName; - } else { - throw new SimpleDataSourceLoadException("SAMDataSource: unknown samFile list type, must be String or File"); - } - + for (File smFile : reads.getReadsFiles()) { if (!smFile.canRead()) { - throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + fileName); + throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName()); } samFileList.add(smFile); @@ -120,7 +121,7 @@ public class SAMDataSource implements SimpleDataSource { * @param location the genome location to extract data for * @return an iterator for that region */ - public MergingSamRecordIterator2 seekLocus(GenomeLoc location) throws SimpleDataSourceLoadException { + public StingSAMIterator seekLocus(GenomeLoc location) throws SimpleDataSourceLoadException { // right now this is pretty damn heavy, it copies the file list into a reader list every time SamFileHeaderMerger headerMerger = createHeaderMerger(); @@ -132,7 +133,7 @@ public class SAMDataSource implements SimpleDataSource { iter.queryOverlapping(location.getContig(), (int) location.getStart(), (int) location.getStop() + 1); // return the iterator - return iter; + return StingSAMIteratorAdapter.adapt( reads, iter ); } /** @@ -144,13 +145,26 @@ public class SAMDataSource implements SimpleDataSource { * @return an iterator for that region */ public StingSAMIterator seek(Shard shard) throws SimpleDataSourceLoadException { + StingSAMIterator iterator = null; if (shard.getShardType() == Shard.ShardType.READ) { - return seekRead((ReadShard) shard); + iterator = seekRead((ReadShard) shard); + iterator = TraversalEngine.applyDecoratingIterators(true, + iterator, + reads.getDownsamplingFraction(), + reads.getMaxOnTheFlySorts(), + reads.getSafetyChecking()); } else if (shard.getShardType() == Shard.ShardType.LOCUS) { - return seekLocus(shard.getGenomeLoc()); + iterator = seekLocus(shard.getGenomeLoc()); + iterator = TraversalEngine.applyDecoratingIterators(false, + iterator, + reads.getDownsamplingFraction(), + reads.getMaxOnTheFlySorts(), + reads.getSafetyChecking()); } else { throw new StingException("seek: Unknown shard type"); } + + return iterator; } @@ -204,7 +218,7 @@ public class SAMDataSource implements SimpleDataSource { if (bound == null) { shard.signalDone(); - bound = new BoundedReadIterator(iter, 0); + bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter), 0); } return bound; } @@ -258,7 +272,7 @@ public class SAMDataSource implements SimpleDataSource { // we're good, increment our read cout this.readsTaken += readCount; - return new BoundedReadIterator(iter, readCount); + return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter), readCount); } @@ -276,7 +290,7 @@ public class SAMDataSource implements SimpleDataSource { if (lastReadPos == null) { lastReadPos = new GenomeLoc(iter.getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, 0); iter.queryContained(lastReadPos.getContig(), 1, -1); - bound = new BoundedReadIterator(iter, readCount); + bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter), readCount); this.readsTaken = readCount; } // we're not at the beginning, not at the end, so we move forward with our ghastly plan... @@ -324,7 +338,7 @@ public class SAMDataSource implements SimpleDataSource { SamFileHeaderMerger mg = createHeaderMerger(); iter = new MergingSamRecordIterator2(mg); iter.queryContained(lastReadPos.getContig(), 1, Integer.MAX_VALUE); - return new BoundedReadIterator(iter,readCount); + return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter),readCount); } } } @@ -349,7 +363,7 @@ public class SAMDataSource implements SimpleDataSource { throw new StingException("Danger: weve run out reads in fastMappedReadSeek"); //return null; } - bound = new BoundedReadIterator(iter, readCount); + bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter), readCount); } diff --git a/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index 0c6560e31..c419ba84c 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.dataSources.shards.Shard; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.OutputTracker; +import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.utils.GenomeLoc; @@ -58,7 +59,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Reduce * @param refFile Reference for driving the traversal. * @param nThreadsToUse maximum number of threads to use to do the work */ - protected HierarchicalMicroScheduler( Walker walker, List reads, File refFile, List> rods, int nThreadsToUse ) { + protected HierarchicalMicroScheduler( Walker walker, Reads reads, File refFile, List> rods, int nThreadsToUse ) { super( walker, reads, refFile, rods ); this.threadPool = Executors.newFixedThreadPool(nThreadsToUse); if( !(traversalEngine instanceof TraverseLociByReference) ) diff --git a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 8da7618c0..d01f5afcc 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; +import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.GenomeLoc; import java.io.File; @@ -20,7 +21,7 @@ public class LinearMicroScheduler extends MicroScheduler { * @param reads Reads file(s) to process. * @param refFile Reference for driving the traversal. */ - protected LinearMicroScheduler( Walker walker, List reads, File refFile, List> rods ) { + protected LinearMicroScheduler( Walker walker, Reads reads, File refFile, List> rods ) { super(walker, reads, refFile, rods); } diff --git a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index 55b9b540b..5a198e416 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -15,6 +15,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; +import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; @@ -51,7 +52,7 @@ public abstract class MicroScheduler { * @param nThreadsToUse Number of threads to utilize. * @return The best-fit microscheduler. */ - public static MicroScheduler create( Walker walker, List reads, File ref, List> rods, int nThreadsToUse ) { + public static MicroScheduler create( Walker walker, Reads reads, File ref, List> rods, int nThreadsToUse ) { if( walker instanceof TreeReducible && nThreadsToUse > 1 ) { logger.info("Creating hierarchical microscheduler"); return new HierarchicalMicroScheduler( walker, reads, ref, rods, nThreadsToUse ); @@ -67,11 +68,11 @@ public abstract class MicroScheduler { * @param reads The reads. * @param refFile File pointer to the reference. */ - protected MicroScheduler( Walker walker, List reads, File refFile, List> rods ) { + protected MicroScheduler( Walker walker, Reads reads, File refFile, List> rods ) { if (walker instanceof ReadWalker) { - traversalEngine = new TraverseReads(reads, refFile, rods); + traversalEngine = new TraverseReads(reads.getReadsFiles(), refFile, rods); } else { - traversalEngine = new TraverseLociByReference(reads, refFile, rods); + traversalEngine = new TraverseLociByReference(reads.getReadsFiles(), refFile, rods); } this.reads = getReadsDataSource( reads ); @@ -132,16 +133,8 @@ public abstract class MicroScheduler { * Gets a data source for the given set of reads. * @return A data source for the given set of reads. */ - private SAMDataSource getReadsDataSource( List reads ) { - List unpackedReads = null; - try { - unpackedReads = TraversalEngine.unpackReads(reads); - } - catch( FileNotFoundException ex ) { - throw new StingException( "Cannot unpack list of reads files", ex ); - } - - SAMDataSource dataSource = new SAMDataSource( unpackedReads ); + private SAMDataSource getReadsDataSource( Reads reads ) { + SAMDataSource dataSource = new SAMDataSource( reads ); // Side effect: initialize the traversal engine with reads data. // TODO: Give users a dedicated way of getting the header so that the MicroScheduler diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java index b1ed23076..9f8d9c8fb 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java @@ -5,6 +5,8 @@ import net.sf.samtools.SAMRecord; import java.util.Iterator; +import org.broadinstitute.sting.gatk.Reads; + /** * * User: aaron @@ -68,6 +70,14 @@ public class BoundedReadIterator implements StingSAMIterator { this.doNotUseThatUnmappedReadPile = useThem; } + /** + * Retrieves information about reads sources. + * @return Info about the sources of reads. + */ + public Reads getSourceInfo() { + return iterator.getSourceInfo(); + } + public SAMFileHeader getHeader() { // todo: this is bad, we need an iterface out there for samrecords that supports getting the header, diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java index f92430dfe..78fa540c2 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/DownsampleIterator.java @@ -5,6 +5,8 @@ import net.sf.samtools.SAMRecord; import java.util.Iterator; import java.util.Random; +import org.broadinstitute.sting.gatk.Reads; + public class DownsampleIterator implements StingSAMIterator { @@ -20,6 +22,15 @@ public class DownsampleIterator implements StingSAMIterator { next = getNextRecord(); } + /** + * Retrieves information about reads sources. + * @return Info about the sources of reads. + */ + public Reads getSourceInfo() { + return it.getSourceInfo(); + } + + public boolean hasNext() { return next != null; } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java index 6b10e6240..fb238b55a 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java @@ -15,7 +15,10 @@ import edu.mit.broad.picard.sam.ReservedTagConstants; import edu.mit.broad.picard.sam.SamFileHeaderMerger; import edu.mit.broad.picard.util.PeekableIterator; import net.sf.samtools.*; +import net.sf.samtools.util.CloseableIterator; import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.utils.StingException; import java.lang.reflect.Constructor; import java.util.Comparator; @@ -28,7 +31,7 @@ import java.util.PriorityQueue; * iterable stream. The underlying iterators/files must all have the same sort order unless * the requested output format is unsorted, in which case any combination is valid. */ -public class MergingSamRecordIterator2 implements StingSAMIterator { +public class MergingSamRecordIterator2 implements CloseableIterator, Iterable { protected PriorityQueue pq = null; protected final SamFileHeaderMerger samHeaderMerger; protected final SAMFileHeader.SortOrder sortOrder; @@ -272,6 +275,7 @@ public class MergingSamRecordIterator2 implements StingSAMIterator { // Should replace picard class with the same name class ComparableSamRecordIterator extends PeekableIterator implements Comparable, StingSAMIterator { + private Reads sourceInfo; private final Comparator comparator; private final SAMFileReader reader; @@ -295,6 +299,12 @@ class ComparableSamRecordIterator extends PeekableIterator implements this.comparator = comparator; } + public Reads getSourceInfo() { + if( sourceInfo == null ) + throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework."); + return sourceInfo; + } + /** Returns the reader from which this iterator was constructed. */ public SAMFileReader getReader() { return reader; diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/SortSamIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/SortSamIterator.java index de0592c83..9c4760970 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/SortSamIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/SortSamIterator.java @@ -1,6 +1,8 @@ package org.broadinstitute.sting.gatk.iterators; import org.broadinstitute.sting.utils.ComparableSAMRecord; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.gatk.Reads; import net.sf.samtools.SAMRecord; @@ -11,11 +13,11 @@ import java.util.Iterator; // TODO: Deprecate? // I don't think we need this if we're only allowing sorted and indexed BAM Files in the GATK - Aaron public class SortSamIterator implements StingSAMIterator { + private Reads sourceInfo; + private Iterator it; - Iterator it; - - public SortSamIterator(Iterator unsortedIter, int maxSorts) { - + public SortSamIterator(StingSAMIterator unsortedIter, int maxSorts) { + sourceInfo = unsortedIter.getSourceInfo(); ArrayList list = new ArrayList(); while (unsortedIter.hasNext()) { list.add(new ComparableSAMRecord(unsortedIter.next())); @@ -27,6 +29,16 @@ public class SortSamIterator implements StingSAMIterator { it = list.iterator(); } + /** + * Retrieves information about reads sources. + * @return Info about the sources of reads. + */ + public Reads getSourceInfo() { + if( sourceInfo == null ) + throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework."); + return sourceInfo; + } + public boolean hasNext() { return it.hasNext(); } public SAMRecord next() { return it.next().getRecord(); } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java index 1b248d097..499f41d76 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIterator.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; +import org.broadinstitute.sting.gatk.Reads; /** * * User: aaron @@ -28,4 +29,10 @@ import net.sf.samtools.util.CloseableIterator; * This is the standard interface for all iterators in the Sting package that iterate over SAMRecords */ public interface StingSAMIterator extends CloseableIterator, Iterable { + /** + * Gets source information for the reads. Contains information about the original reads + * files, plus information about downsampling, etc. + * @return + */ + public Reads getSourceInfo(); } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIteratorAdapter.java b/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIteratorAdapter.java index 724d39b2f..4ecbd64dd 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIteratorAdapter.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/StingSAMIteratorAdapter.java @@ -5,6 +5,9 @@ import net.sf.samtools.util.CloseableIterator; import java.util.Iterator; +import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.utils.StingException; + /** * * User: aaron @@ -31,14 +34,14 @@ import java.util.Iterator; *

* This class adapts other SAMRecord iterators to the StingSAMIterator */ -public class StingSAMIteratorAdapter { +public class StingSAMIteratorAdapter { - public static StingSAMIterator adapt(Iterator iter) { - return new PrivateStringSAMIterator(iter); + public static StingSAMIterator adapt(Reads sourceInfo, Iterator iter) { + return new PrivateStringSAMIterator(sourceInfo, iter); } - public static StingSAMIterator adapt(CloseableIterator iter) { - return new PrivateStringSAMCloseableIterator(iter); + public static StingSAMIterator adapt(Reads sourceInfo, CloseableIterator iter) { + return new PrivateStringSAMCloseableIterator(sourceInfo, iter); } } @@ -49,12 +52,20 @@ public class StingSAMIteratorAdapter { * methods that implement the iterable<> interface and the close() method from CloseableIterator */ class PrivateStringSAMIterator implements StingSAMIterator { + private Reads sourceInfo = null; private Iterator iter = null; - PrivateStringSAMIterator(Iterator iter) { + PrivateStringSAMIterator(Reads sourceInfo, Iterator iter) { + this.sourceInfo = sourceInfo; this.iter = iter; } + public Reads getSourceInfo() { + if( sourceInfo == null ) + throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework."); + return sourceInfo; + } + public void close() { // do nothing, we can't close the iterator anyway. } @@ -82,12 +93,20 @@ class PrivateStringSAMIterator implements StingSAMIterator { * methods that implement the iterable<> interface. */ class PrivateStringSAMCloseableIterator implements StingSAMIterator { + private Reads sourceInfo = null; private CloseableIterator iter = null; - PrivateStringSAMCloseableIterator(CloseableIterator iter) { + PrivateStringSAMCloseableIterator(Reads sourceInfo, CloseableIterator iter) { + this.sourceInfo = sourceInfo; this.iter = iter; } + public Reads getSourceInfo() { + if( sourceInfo == null ) + throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework."); + return sourceInfo; + } + public void close() { iter.close(); } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java index 4b0e341f7..2f96cd463 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.RuntimeIOException; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.gatk.Reads; import java.util.Iterator; @@ -23,6 +24,15 @@ public class VerifyingSamIterator implements StingSAMIterator { this.it = it; } + /** + * Retrieves information about reads sources. + * @return Info about the sources of reads. + */ + public Reads getSourceInfo() { + return it.getSourceInfo(); + } + + public boolean hasNext() { return this.it.hasNext(); } public SAMRecord next() { diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index 8fa7c61c7..3f2da87e8 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -16,6 +16,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.dataSources.shards.Shard; import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; @@ -131,11 +132,11 @@ public abstract class TraversalEngine { this.walkOverAllSites = walkOverAllSites; } - public void setDebugging(final boolean d) { + private void setDebugging(final boolean d) { DEBUGGING = d; } - public void setSafetyChecking(final boolean beSafeP) { + private void setSafetyChecking(final boolean beSafeP) { if (!beSafeP) logger.warn("*** Turning off safety checking, I hope you know what you are doing. Errors will result in debugging assert failures and other inscrutable messages..."); this.beSafeP = beSafeP; @@ -147,26 +148,39 @@ public abstract class TraversalEngine { this.FILTER_UNSORTED_READS = filterUnsorted; } - public void setSortOnFly(final int maxReadsToSort) { + private void setSortOnFly(final int maxReadsToSort) { logger.info("Sorting read file on the fly: max reads allowed is " + maxReadsToSort); SORT_ON_FLY = true; maxOnFlySorts = maxReadsToSort; } - public void setSortOnFly() { setSortOnFly(100000); } + private void setSortOnFly() { setSortOnFly(100000); } - public void setDownsampleByFraction(final double fraction) { + private void setDownsampleByFraction(final double fraction) { logger.info("Downsampling to approximately " + (fraction * 100.0) + "% of filtered reads"); DOWNSAMPLE_BY_FRACTION = true; downsamplingFraction = fraction; } - public void setDownsampleByCoverage(final int coverage) { + private void setDownsampleByCoverage(final int coverage) { logger.info("Downsampling to coverage " + coverage); DOWNSAMPLE_BY_COVERAGE = true; downsamplingCoverage = coverage; } + /** + * Sets up read filtering performed by the traversal engine. + * @param reads Read filters to instantiate as the traversal engine walks over the data. + * @deprecated Should only be used by old-style traversals. + */ + @Deprecated + public void setReadFilters( Reads reads) { + if( reads.getDownsamplingFraction() != null ) setDownsampleByFraction(reads.getDownsamplingFraction()); + if( reads.getDownsampleToCoverage() != null ) setDownsampleByCoverage(reads.getDownsampleToCoverage()); + if( reads.getMaxOnTheFlySorts() != null ) setSortOnFly(reads.getMaxOnTheFlySorts()); + if( reads.getSafetyChecking() != null ) setSafetyChecking(reads.getSafetyChecking()); + } + // -------------------------------------------------------------------------------------------------------------- // // functions for dealing locations (areas of the genome we're traversing over) @@ -318,36 +332,9 @@ public abstract class TraversalEngine { return true; } - /** - * Unpack the files to be processed, given a list of files. That list of files can - * itself contain lists of other files to be read. - * @param inputFiles - * @return - */ - public static List unpackReads( List inputFiles ) throws FileNotFoundException { - List unpackedReads = new ArrayList(); - for( File inputFile: inputFiles ) { - if( inputFile.getName().endsWith(".list") ) { - for( String fileName : new xReadLines(inputFile) ) - unpackedReads.add( new File(fileName) ); - } - else - unpackedReads.add( inputFile ); - } - return unpackedReads; - } - protected Iterator initializeReads() { - List allReadsFiles = null; - try { - allReadsFiles = unpackReads( readsFiles ); - } - catch( FileNotFoundException ex ) { - throw new RuntimeException("Unable to unpack reads", ex ); - } - - if( allReadsFiles.size() == 1 ) - samReader = initializeSAMFile(allReadsFiles.get(0)); + if( readsFiles.size() == 1 ) + samReader = initializeSAMFile(readsFiles.get(0)); else samReader = null; return wrapReadsIterator(getReadsIterator(samReader), true); @@ -358,16 +345,9 @@ public abstract class TraversalEngine { if ( samReader == null && readsFiles.size() > 0 ) { SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate; - List allReadsFiles = null; List readers = new ArrayList(); - try { - allReadsFiles = unpackReads( readsFiles ); - } - catch(FileNotFoundException ex) { - throw new RuntimeException("Unable to unpack reads", ex); - } - for ( File readsFile: allReadsFiles ) { + for ( File readsFile: readsFiles ) { SAMFileReader reader = initializeSAMFile(readsFile); readers.add(reader); } @@ -382,28 +362,52 @@ public abstract class TraversalEngine { @Deprecated protected StingSAMIterator wrapReadsIterator( final Iterator rawIterator, final boolean enableVerification ) { - StingSAMIterator wrappedIterator = StingSAMIteratorAdapter.adapt(rawIterator); - wrappedIterator = applyDecoratingIterators(enableVerification, wrappedIterator); - + // Reads sourceInfo is gone by this point in the traversal engine. Stub in a null and rely on the iterator to + // throw an exception if reads info isn't present. + StingSAMIterator wrappedIterator = StingSAMIteratorAdapter.adapt(null,rawIterator); + wrappedIterator = applyDecoratingIterators(enableVerification,wrappedIterator); if (THREADED_IO) { logger.info(String.format("Enabling threaded I/O with buffer of %d reads", THREADED_IO_BUFFER_SIZE)); - wrappedIterator = StingSAMIteratorAdapter.adapt(new ThreadedIterator(wrappedIterator, THREADED_IO_BUFFER_SIZE)); + wrappedIterator = StingSAMIteratorAdapter.adapt(null,new ThreadedIterator(wrappedIterator, THREADED_IO_BUFFER_SIZE)); } return wrappedIterator; } - protected StingSAMIterator applyDecoratingIterators(boolean enableVerification, StingSAMIterator wrappedIterator) { + /** + * Repackage instance variables and call static method. + * TODO: This method's days are numbered. + * @param enableVerification + * @param wrappedIterator + * @return + */ + protected StingSAMIterator applyDecoratingIterators( final boolean enableVerification, final StingSAMIterator wrappedIterator ) { + return applyDecoratingIterators(enableVerification, + wrappedIterator, + DOWNSAMPLE_BY_FRACTION ? downsamplingFraction : null, + SORT_ON_FLY ? maxOnFlySorts : null, + beSafeP); + } + + /** + * WARNING: In TraversalEngine for backward compatibility ONLY. Reads are not used as the data source, only as parameters + * for validation. + */ + public static StingSAMIterator applyDecoratingIterators(boolean enableVerification, + StingSAMIterator wrappedIterator, + Double downsamplingFraction, + Integer maxOnFlySorts, + Boolean beSafeP) { // NOTE: this (and other filtering) should be done before on-the-fly sorting // as there is no reason to sort something that we will end of throwing away - if (DOWNSAMPLE_BY_FRACTION) + if (downsamplingFraction != null) wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction); - if (SORT_ON_FLY) + if (maxOnFlySorts != null) wrappedIterator = new SortSamIterator(wrappedIterator, maxOnFlySorts); - if (beSafeP && enableVerification) + if (beSafeP != null && beSafeP && enableVerification) wrappedIterator = new VerifyingSamIterator(wrappedIterator); return wrappedIterator; } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociByReference.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociByReference.java index a79258c47..84a9efeb5 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociByReference.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociByReference.java @@ -2,11 +2,15 @@ package org.broadinstitute.sting.gatk.traversals; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.dataSources.shards.Shard; import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceLocusIterator; import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.dataSources.providers.SeekableLocusContextQueue; +import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextQueue; +import org.broadinstitute.sting.gatk.dataSources.providers.IterableLocusContextQueue; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -33,7 +37,6 @@ public class TraverseLociByReference extends TraversalEngine { */ protected static Logger logger = Logger.getLogger(TraversalEngine.class); - public TraverseLociByReference(List reads, File ref, List> rods) { super( reads, ref, rods ); } @@ -57,8 +60,23 @@ public class TraverseLociByReference extends TraversalEngine { LocusWalker locusWalker = (LocusWalker)walker; - LocusIterator locusIterator = new ReferenceLocusIterator( dataProvider ); - SeekableLocusContextQueue locusContextQueue = new SeekableLocusContextQueue( dataProvider ); + LocusIterator locusIterator = null; + LocusContextQueue locusContextQueue = null; + + DataSource dataSource = WalkerManager.getWalkerDataSource(walker); + switch( dataSource ) { + case REFERENCE: + locusIterator = new ReferenceLocusIterator( dataProvider ); + locusContextQueue = new SeekableLocusContextQueue( dataProvider ); + break; + case READS: + IterableLocusContextQueue iterableQueue = new IterableLocusContextQueue( dataProvider ); + locusIterator = iterableQueue; + locusContextQueue = iterableQueue; + break; + default: + throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource); + } // We keep processing while the next reference location is within the interval while( locusIterator.hasNext() ) { @@ -72,9 +90,6 @@ public class TraverseLociByReference extends TraversalEngine { LocusContext locus = locusContextQueue.seek( site ).peek(); char refBase = dataProvider.getReferenceBase( site ); - if ( DOWNSAMPLE_BY_COVERAGE ) - locus.downsampleToCoverage(downsamplingCoverage); - final boolean keepMeP = locusWalker.filter(tracker, refBase, locus); if (keepMeP) { M x = locusWalker.map(tracker, refBase, locus); diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java index d822597f4..4ffc4685b 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java @@ -6,7 +6,6 @@ import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard; import org.broadinstitute.sting.gatk.dataSources.shards.Shard; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.ReadWalker; @@ -88,11 +87,8 @@ public class TraverseReads extends TraversalEngine { ReadWalker readWalker = (ReadWalker) walker; - // we allow a bunch of wrapping iterators for down sampling, threadingIO, etc. - StingSAMIterator it = applyDecoratingIterators(true, dataProvider.getReadIterator()); - // while we still have more reads - for (SAMRecord read : it) { + for (SAMRecord read : dataProvider.getReadIterator()) { // our locus context LocusContext locus = null; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/By.java b/java/src/org/broadinstitute/sting/gatk/walkers/By.java new file mode 100755 index 000000000..25455b587 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/By.java @@ -0,0 +1,33 @@ +package org.broadinstitute.sting.gatk.walkers; + +import java.lang.annotation.Documented; +import java.lang.annotation.Inherited; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; +import java.lang.annotation.ElementType; +/** + * User: hanna + * Date: May 14, 2009 + * Time: 1:51:22 PM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * Allows the walker to indicate what type of data it wants to consume. + */ + +@Documented +@Inherited +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.TYPE) +public @interface By { + DataSource value(); +} + diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/DataSource.java b/java/src/org/broadinstitute/sting/gatk/walkers/DataSource.java new file mode 100755 index 000000000..e60fe3d62 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/DataSource.java @@ -0,0 +1,21 @@ +package org.broadinstitute.sting.gatk.walkers; +/** + * User: hanna + * Date: May 14, 2009 + * Time: 2:12:33 PM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * Allow user to choose between a number of different data sources. + */ +public enum DataSource { + READS, + REFERENCE +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java index 2c186534b..83565f532 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java @@ -1,11 +1,8 @@ package org.broadinstitute.sting.gatk.walkers; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.LocusContext; -import java.util.List; - /** * Created by IntelliJ IDEA. * User: mdepristo @@ -13,6 +10,7 @@ import java.util.List; * Time: 2:52:28 PM * To change this template use File | Settings | File Templates. */ +@By(DataSource.READS) public abstract class LocusWalker extends Walker { // Do we actually want to operate on the context? public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PrintLocusContextWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PrintLocusContextWalker.java index 12a48f659..0e1646268 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PrintLocusContextWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PrintLocusContextWalker.java @@ -17,6 +17,7 @@ import net.sf.samtools.SAMRecord; * Time: 11:23:14 AM * To change this template use File | Settings | File Templates. */ +@By(DataSource.REFERENCE) public class PrintLocusContextWalker extends LocusWalker implements TreeReducible { public LocusContext map(RefMetaDataTracker tracker, char ref, LocusContext context) { out.printf( "In map: ref = %c, loc = %s, reads = %s%n", ref, diff --git a/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java b/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java index 3787d9d72..dfd6a8527 100755 --- a/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java +++ b/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java @@ -82,9 +82,9 @@ public class GATKArgumentCollectionTest extends BaseTest { collect.HAPMAPChipFile = "HAPMAPChipFile".toLowerCase(); collect.enabledThreadedIO = true; collect.unsafe = false; - collect.maximumReadSorts = "maximumReadSorts".toLowerCase(); - collect.downsampleFraction = "downsampleFraction".toLowerCase(); - collect.downsampleCoverage = "downsampleCoverage".toLowerCase(); + collect.maximumReadSorts = null; + collect.downsampleFraction = null; + collect.downsampleCoverage = null; collect.intervals = "intervals".toLowerCase(); collect.walkAllLoci = true; collect.disableThreading = false; diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/providers/IterableLocusContextQueueTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/providers/IterableLocusContextQueueTest.java index 41c69529a..c4aa60a3e 100755 --- a/java/test/org/broadinstitute/sting/gatk/dataSources/providers/IterableLocusContextQueueTest.java +++ b/java/test/org/broadinstitute/sting/gatk/dataSources/providers/IterableLocusContextQueueTest.java @@ -84,7 +84,8 @@ public class IterableLocusContextQueueTest extends LocusContextQueueTemplate { if(new GenomeLoc(read).containsP(locusContext.getLocation())) Assert.assertTrue("Target locus context does not contain reads", locusContext.getReads().contains(read) ); } - } + + Assert.assertFalse("Iterator is not bounded at boundaries of shard", iterableQueue.hasNext()); } } diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/providers/LocusContextQueueTemplate.java b/java/test/org/broadinstitute/sting/gatk/dataSources/providers/LocusContextQueueTemplate.java index aee5e39d6..f26daede2 100755 --- a/java/test/org/broadinstitute/sting/gatk/dataSources/providers/LocusContextQueueTemplate.java +++ b/java/test/org/broadinstitute/sting/gatk/dataSources/providers/LocusContextQueueTemplate.java @@ -7,8 +7,10 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.dataSources.shards.Shard; import org.broadinstitute.sting.gatk.dataSources.shards.LocusShard; +import org.broadinstitute.sting.gatk.Reads; import java.io.FileNotFoundException; +import java.io.File; import java.util.Collections; import java.util.List; import java.util.Iterator; @@ -294,6 +296,11 @@ public abstract class LocusContextQueueTemplate extends BaseTest { backingIterator = backingList.iterator(); } + public Reads getSourceInfo() { + // There are no sources for these reads. + return new Reads(new ArrayList()); + } + public boolean hasNext() { return backingIterator.hasNext(); } diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java index c66d4c08a..8b192feda 100755 --- a/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java +++ b/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java @@ -7,6 +7,8 @@ import org.broadinstitute.sting.gatk.dataSources.shards.Shard; import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory; import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; import org.junit.After; @@ -45,7 +47,7 @@ import java.util.List; */ public class SAMBAMDataSourceTest extends BaseTest { - private List fl; + private List fl; private FastaSequenceFile2 seq; /** @@ -55,7 +57,7 @@ public class SAMBAMDataSourceTest extends BaseTest { */ @Before public void doForEachTest() { - fl = new ArrayList(); + fl = new ArrayList(); // sequence seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); @@ -83,18 +85,18 @@ public class SAMBAMDataSourceTest extends BaseTest { int count = 0; // setup the data - fl.add(oneKGLocation + "/pilot3/sams/NA12892.bam"); - + fl.add(new File(oneKGLocation + "/pilot3/sams/NA12892.bam")); + Reads reads = new Reads(fl); try { - SAMDataSource data = new SAMDataSource(fl); + SAMDataSource data = new SAMDataSource(reads); for (Shard sh : strat) { int readCount = 0; count++; logger.debug("Start : " + sh.getGenomeLoc().getStart() + " stop : " + sh.getGenomeLoc().getStop() + " contig " + sh.getGenomeLoc().getContig()); logger.debug("count = " + count); - MergingSamRecordIterator2 datum = (MergingSamRecordIterator2)data.seek(sh); + StingSAMIterator datum = data.seek(sh); // for the first couple of shards make sure we can see the reads if (count < 5) { @@ -126,7 +128,8 @@ public class SAMBAMDataSourceTest extends BaseTest { // setup the test files - fl.add(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188.aligned.duplicates_marked.bam"); + fl.add(new File(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188.aligned.duplicates_marked.bam")); + Reads reads = new Reads(fl); ArrayList readcountPerShard = new ArrayList(); ArrayList readcountPerShard2 = new ArrayList(); @@ -136,7 +139,7 @@ public class SAMBAMDataSourceTest extends BaseTest { int count = 0; try { - SAMDataSource data = new SAMDataSource(fl); + SAMDataSource data = new SAMDataSource(reads); for (Shard sh : strat) { int readCount = 0; count++; @@ -144,7 +147,7 @@ public class SAMBAMDataSourceTest extends BaseTest { break; } - MergingSamRecordIterator2 datum = (MergingSamRecordIterator2)data.seek(sh); + StingSAMIterator datum = data.seek(sh); for (SAMRecord r : datum) { readCount++; @@ -163,15 +166,17 @@ public class SAMBAMDataSourceTest extends BaseTest { // setup the data and the counter before our second run fl.clear(); - fl.add(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188-01A-01W.aligned.duplicates_marked.bam"); - fl.add(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188-10B-01W.aligned.duplicates_marked.bam"); + fl.add(new File(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188-01A-01W.aligned.duplicates_marked.bam")); + fl.add(new File(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188-10B-01W.aligned.duplicates_marked.bam")); + reads = new Reads(fl); + count = 0; // the sharding strat. strat = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, seq.getSequenceDictionary(), 100000); logger.debug("Pile two:"); try { - SAMDataSource data = new SAMDataSource(fl); + SAMDataSource data = new SAMDataSource(reads); for (Shard sh : strat) { int readCount = 0; count++; @@ -181,7 +186,7 @@ public class SAMBAMDataSourceTest extends BaseTest { break; } - MergingSamRecordIterator2 datum = (MergingSamRecordIterator2)data.seek(sh); + StingSAMIterator datum = data.seek(sh); for (SAMRecord r : datum) { readCount++; diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMByReadsTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMByReadsTest.java index d0a908567..193ea0f42 100755 --- a/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMByReadsTest.java +++ b/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMByReadsTest.java @@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.dataSources.shards.Shard; import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory; import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator; +import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; import static org.junit.Assert.assertEquals; @@ -46,7 +47,7 @@ import java.util.List; public class SAMByReadsTest extends BaseTest { private FastaSequenceFile2 seq; - private List fl; + private List fl; /** * This function does the setup of our parser, before each method call. @@ -55,7 +56,7 @@ public class SAMByReadsTest extends BaseTest { */ @Before public void doForEachTest() { - fl = new ArrayList(); + fl = new ArrayList(); // sequence seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly17/v0/Homo_sapiens_assembly17.fasta")); @@ -69,13 +70,15 @@ public class SAMByReadsTest extends BaseTest { logger.warn("Executing testTotalReadCount"); // setup the test files - fl.add("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam"); + fl.add(new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam")); + Reads reads = new Reads(fl); + final int targetReadCount = 5000; ShardStrategy shardStrategy = ShardStrategyFactory.shatterByReadCount(seq.getSequenceDictionary(),targetReadCount); try { - SAMDataSource data = new SAMDataSource(fl); + SAMDataSource data = new SAMDataSource(reads); // check the total read count final int totalReads = 10000; diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java index e7ea7cb97..40db61131 100755 --- a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java +++ b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java @@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory; import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException; +import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; import static org.junit.Assert.assertEquals; @@ -48,7 +49,7 @@ import java.util.List; public class BoundedReadIteratorTest extends BaseTest { /** the file list and the fasta sequence */ - private List fl; + private List fl; private FastaSequenceFile2 seq; /** @@ -58,7 +59,7 @@ public class BoundedReadIteratorTest extends BaseTest { */ @Before public void doForEachTest() { - fl = new ArrayList(); + fl = new ArrayList(); // sequence seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); @@ -76,14 +77,15 @@ public class BoundedReadIteratorTest extends BaseTest { // setup the test files - fl.add(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188.aligned.duplicates_marked.bam"); + fl.add(new File(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188.aligned.duplicates_marked.bam")); + Reads reads = new Reads(fl); // our target read final long boundedReadCount = 100; long shardReadCount = 0; try { - SAMDataSource data = new SAMDataSource(fl); + SAMDataSource data = new SAMDataSource(reads); // make sure we have a shard if (!strat.hasNext()) { @@ -92,8 +94,8 @@ public class BoundedReadIteratorTest extends BaseTest { Shard sd = strat.next(); - MergingSamRecordIterator2 datum = (MergingSamRecordIterator2)data.seek(sd); - MergingSamRecordIterator2 datum2 = (MergingSamRecordIterator2)data.seek(sd); + StingSAMIterator datum = data.seek(sd); + StingSAMIterator datum2 = data.seek(sd); // check the reads in the shard for (SAMRecord r : datum) { @@ -102,7 +104,7 @@ public class BoundedReadIteratorTest extends BaseTest { } // create the bounded iterator - BoundedReadIterator iter = new BoundedReadIterator(datum2, boundedReadCount); + BoundedReadIterator iter = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,datum2), boundedReadCount); // now see how many reads are in the bounded iterator int readCount = 0; diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/StingSAMIteratorAdapterTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/StingSAMIteratorAdapterTest.java index 5f022162b..92ce212ac 100755 --- a/java/test/org/broadinstitute/sting/gatk/iterators/StingSAMIteratorAdapterTest.java +++ b/java/test/org/broadinstitute/sting/gatk/iterators/StingSAMIteratorAdapterTest.java @@ -97,7 +97,7 @@ public class StingSAMIteratorAdapterTest extends BaseTest { final int COUNT = 100; MyTestIterator it = new MyTestIterator(); - StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(it); + StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(null,it); int countCheck = 0; while (samIt.hasNext()) { samIt.next(); @@ -116,7 +116,7 @@ public class StingSAMIteratorAdapterTest extends BaseTest { MyTestCloseableIterator it = new MyTestCloseableIterator(); - StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(it); + StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(null,it); int countCheck = 0; while (samIt.hasNext()) { @@ -133,7 +133,7 @@ public class StingSAMIteratorAdapterTest extends BaseTest { MyTestCloseableIterator it = new MyTestCloseableIterator(); - StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(it); + StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(null,it); int countCheck = 0; diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java index 7c0bdc08a..98c62ffe3 100755 --- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java +++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java @@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.CountReadsWalker; import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; @@ -121,15 +122,7 @@ public class TraverseReadsTest extends BaseTest { ref.getSequenceDictionary(), readSize); - List unpackedReads = null; - try { - unpackedReads = TraversalEngine.unpackReads(bamList); - } - catch (FileNotFoundException ex) { - throw new RuntimeException(ex); - } - - SAMDataSource dataSource = new SAMDataSource(unpackedReads); + SAMDataSource dataSource = new SAMDataSource(new Reads(bamList)); dataSource.viewUnmappedReads(false); countReadWalker.initialize(); @@ -176,15 +169,8 @@ public class TraverseReadsTest extends BaseTest { ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS, ref.getSequenceDictionary(), readSize); - List unpackedReads = null; - try { - unpackedReads = TraversalEngine.unpackReads(bamList); - } - catch (FileNotFoundException ex) { - throw new RuntimeException(ex); - } - SAMDataSource dataSource = new SAMDataSource(unpackedReads); + SAMDataSource dataSource = new SAMDataSource(new Reads(bamList)); dataSource.viewUnmappedReads(true); countReadWalker.initialize();