Switch TraverseByLoci over to new sharding system, and cleanup some code in passing read files along
the pathway from command line to traversal engine. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@727 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
57e5f22987
commit
2c4de7b5c5
|
|
@ -1,11 +1,18 @@
|
||||||
package org.broadinstitute.sting.gatk;
|
package org.broadinstitute.sting.gatk;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
|
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.xReadLines;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection;
|
import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
|
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
|
||||||
|
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* User: aaron
|
* User: aaron
|
||||||
|
|
@ -80,6 +87,9 @@ public class CommandLineGATK extends CommandLineProgram {
|
||||||
}
|
}
|
||||||
loadArgumentsIntoObject(argCollection);
|
loadArgumentsIntoObject(argCollection);
|
||||||
loadArgumentsIntoObject(mWalker);
|
loadArgumentsIntoObject(mWalker);
|
||||||
|
|
||||||
|
processArguments(argCollection);
|
||||||
|
|
||||||
this.argCollection.analysisName = this.analysisName;
|
this.argCollection.analysisName = this.analysisName;
|
||||||
try {
|
try {
|
||||||
GATKEngine = new GenomeAnalysisEngine(argCollection, mWalker);
|
GATKEngine = new GenomeAnalysisEngine(argCollection, mWalker);
|
||||||
|
|
@ -132,4 +142,38 @@ public class CommandLineGATK extends CommandLineProgram {
|
||||||
this.argCollection = argCollection;
|
this.argCollection = argCollection;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Preprocess the arguments before submitting them to the GATK engine.
|
||||||
|
* @param argCollection Collection of arguments to preprocess.
|
||||||
|
*/
|
||||||
|
private void processArguments( GATKArgumentCollection argCollection ) {
|
||||||
|
argCollection.samFiles = unpackReads( argCollection.samFiles );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unpack the files to be processed, given a list of files. That list of files can
|
||||||
|
* itself contain lists of other files to be read.
|
||||||
|
* @param inputFiles
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
private List<File> unpackReads( List<File> inputFiles ) {
|
||||||
|
List<File> unpackedReads = new ArrayList<File>();
|
||||||
|
for( File inputFile: inputFiles ) {
|
||||||
|
if( inputFile.getName().endsWith(".list") ) {
|
||||||
|
try {
|
||||||
|
for( String fileName : new xReadLines(inputFile) )
|
||||||
|
unpackedReads.add( new File(fileName) );
|
||||||
|
}
|
||||||
|
catch( FileNotFoundException ex ) {
|
||||||
|
throw new StingException("Unable to find file while unpacking reads", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
unpackedReads.add( inputFile );
|
||||||
|
}
|
||||||
|
return unpackedReads;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -96,15 +96,15 @@ public class GATKArgumentCollection {
|
||||||
|
|
||||||
@Element(required=false)
|
@Element(required=false)
|
||||||
@Argument(fullName = "sort_on_the_fly", shortName = "sort", doc = "Maximum number of reads to sort on the fly", required = false)
|
@Argument(fullName = "sort_on_the_fly", shortName = "sort", doc = "Maximum number of reads to sort on the fly", required = false)
|
||||||
public String maximumReadSorts = null;
|
public Integer maximumReadSorts = null;
|
||||||
|
|
||||||
@Element(required=false)
|
@Element(required=false)
|
||||||
@Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false)
|
@Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false)
|
||||||
public String downsampleFraction = null;
|
public Double downsampleFraction = null;
|
||||||
|
|
||||||
@Element(required=false)
|
@Element(required=false)
|
||||||
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to", required = false)
|
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to", required = false)
|
||||||
public String downsampleCoverage = null;
|
public Integer downsampleCoverage = null;
|
||||||
|
|
||||||
@Element(required=false)
|
@Element(required=false)
|
||||||
@Argument(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false)
|
@Argument(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false)
|
||||||
|
|
@ -238,13 +238,16 @@ public class GATKArgumentCollection {
|
||||||
if (!other.unsafe.equals(this.unsafe)) {
|
if (!other.unsafe.equals(this.unsafe)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!other.maximumReadSorts.equals(this.maximumReadSorts)) {
|
if ((other.maximumReadSorts == null && this.maximumReadSorts != null) ||
|
||||||
|
(other.maximumReadSorts != null && !other.maximumReadSorts.equals(this.maximumReadSorts))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!other.downsampleFraction.equals(this.downsampleFraction)) {
|
if ((other.downsampleFraction == null && this.downsampleFraction != null) ||
|
||||||
|
(other.downsampleFraction != null && !other.downsampleFraction.equals(this.downsampleFraction))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!other.downsampleCoverage.equals(this.downsampleCoverage)) {
|
if ((other.downsampleCoverage == null && this.downsampleCoverage != null) ||
|
||||||
|
(other.downsampleCoverage != null && !other.downsampleCoverage.equals(this.downsampleCoverage))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!other.walkAllLoci.equals(this.walkAllLoci)) {
|
if (!other.walkAllLoci.equals(this.walkAllLoci)) {
|
||||||
|
|
|
||||||
|
|
@ -84,7 +84,7 @@ public class GenomeAnalysisEngine {
|
||||||
|
|
||||||
// if we're a read or a locus walker, we use the new system. Right now we have complicated
|
// if we're a read or a locus walker, we use the new system. Right now we have complicated
|
||||||
// branching based on the input data, but this should disapear when all the traversals are switched over
|
// branching based on the input data, but this should disapear when all the traversals are switched over
|
||||||
if ((my_walker instanceof LocusWalker && argCollection.walkAllLoci && !(argCollection.samFiles == null || argCollection.samFiles.size() == 0)) ||
|
if ((my_walker instanceof LocusWalker && !(argCollection.samFiles == null || argCollection.samFiles.size() == 0)) ||
|
||||||
my_walker instanceof ReadWalker) {
|
my_walker instanceof ReadWalker) {
|
||||||
microScheduler = createMicroscheduler(my_walker, rods);
|
microScheduler = createMicroscheduler(my_walker, rods);
|
||||||
} else { // we have an old style traversal, once we're done return
|
} else { // we have an old style traversal, once we're done return
|
||||||
|
|
@ -181,13 +181,15 @@ public class GenomeAnalysisEngine {
|
||||||
Utils.scareUser(String.format("Analysis %s doesn't support SAM/BAM reads, but a read file %s was provided", argCollection.analysisName, argCollection.samFiles));
|
Utils.scareUser(String.format("Analysis %s doesn't support SAM/BAM reads, but a read file %s was provided", argCollection.analysisName, argCollection.samFiles));
|
||||||
|
|
||||||
// create the MicroScheduler
|
// create the MicroScheduler
|
||||||
microScheduler = MicroScheduler.create(my_walker, argCollection.samFiles, argCollection.referenceFile, rods, argCollection.numberOfThreads);
|
if( argCollection.walkAllLoci )
|
||||||
|
Utils.scareUser("Argument --all_loci is deprecated. Please annotate your walker with @By(DataSource.REFERENCE) to perform a by-reference traversal.");
|
||||||
|
microScheduler = MicroScheduler.create(my_walker, new Reads(argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads);
|
||||||
engine = microScheduler.getTraversalEngine();
|
engine = microScheduler.getTraversalEngine();
|
||||||
}
|
}
|
||||||
else if (my_walker instanceof ReadWalker) {
|
else if (my_walker instanceof ReadWalker) {
|
||||||
if (argCollection.referenceFile == null)
|
if (argCollection.referenceFile == null)
|
||||||
Utils.scareUser(String.format("Locus-based traversals require a reference file but none was given"));
|
Utils.scareUser(String.format("Locus-based traversals require a reference file but none was given"));
|
||||||
microScheduler = MicroScheduler.create(my_walker, argCollection.samFiles, argCollection.referenceFile, rods, argCollection.numberOfThreads);
|
microScheduler = MicroScheduler.create(my_walker, new Reads(argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads);
|
||||||
engine = microScheduler.getTraversalEngine();
|
engine = microScheduler.getTraversalEngine();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -209,26 +211,14 @@ public class GenomeAnalysisEngine {
|
||||||
if (argCollection.intervals != null) {
|
if (argCollection.intervals != null) {
|
||||||
engine.setLocation(setupIntervalRegion());
|
engine.setLocation(setupIntervalRegion());
|
||||||
}
|
}
|
||||||
// hmm...
|
|
||||||
if (argCollection.maximumReadSorts != null) {
|
|
||||||
engine.setSortOnFly(Integer.parseInt(argCollection.maximumReadSorts));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (argCollection.downsampleFraction != null) {
|
engine.setReadFilters(new Reads(argCollection));
|
||||||
engine.setDownsampleByFraction(Double.parseDouble(argCollection.downsampleFraction));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (argCollection.downsampleCoverage != null) {
|
|
||||||
engine.setDownsampleByCoverage(Integer.parseInt(argCollection.downsampleCoverage));
|
|
||||||
}
|
|
||||||
|
|
||||||
engine.setSafetyChecking(!argCollection.unsafe);
|
|
||||||
engine.setThreadedIO(argCollection.enabledThreadedIO);
|
engine.setThreadedIO(argCollection.enabledThreadedIO);
|
||||||
engine.setWalkOverAllSites(argCollection.walkAllLoci);
|
engine.setWalkOverAllSites(argCollection.walkAllLoci);
|
||||||
engine.initialize();
|
engine.initialize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* setup the interval regions, from either the interval file of the genome region string
|
* setup the interval regions, from either the interval file of the genome region string
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,95 @@
|
||||||
|
package org.broadinstitute.sting.gatk;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.util.List;
|
||||||
|
/**
|
||||||
|
* User: hanna
|
||||||
|
* Date: May 14, 2009
|
||||||
|
* Time: 4:06:26 PM
|
||||||
|
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||||
|
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||||
|
* All rights are reserved.
|
||||||
|
*
|
||||||
|
* Users acknowledge that this software is supplied without any warranty or support.
|
||||||
|
* The Broad Institute is not responsible for its use, misuse, or
|
||||||
|
* functionality.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A data structure containing information about the reads data sources as well as
|
||||||
|
* information about how they should be downsampled, sorted, and filtered.
|
||||||
|
*/
|
||||||
|
public class Reads {
|
||||||
|
private List<File> readsFiles = null;
|
||||||
|
|
||||||
|
private Double downsamplingFraction = null;
|
||||||
|
private Integer downsampleToCoverage = null;
|
||||||
|
private Integer maxOnFlySorts = null;
|
||||||
|
private Boolean beSafe = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a list of the files acting as sources of reads.
|
||||||
|
* @return A list of files storing reads data.
|
||||||
|
*/
|
||||||
|
public List<File> getReadsFiles() {
|
||||||
|
return readsFiles;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the fraction of reads to downsample.
|
||||||
|
* @return Downsample fraction.
|
||||||
|
*/
|
||||||
|
public Double getDownsamplingFraction() {
|
||||||
|
return downsamplingFraction;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Downsample each locus to the specified coverage.
|
||||||
|
* @return Coverage to which to downsample.
|
||||||
|
*/
|
||||||
|
public Integer getDownsampleToCoverage() {
|
||||||
|
return downsampleToCoverage;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the maximum number of supported on-the-fly sorts.
|
||||||
|
* @return Max number of on-the-fly sorts.
|
||||||
|
*/
|
||||||
|
public Integer getMaxOnTheFlySorts() {
|
||||||
|
return maxOnFlySorts;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return whether to 'verify' the reads as we pass through them.
|
||||||
|
* @return Whether to verify the reads.
|
||||||
|
*/
|
||||||
|
public Boolean getSafetyChecking() {
|
||||||
|
return beSafe;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple constructor for unit testing.
|
||||||
|
* @param readsFiles List of reads files to open.
|
||||||
|
*/
|
||||||
|
public Reads( List<File> readsFiles ) {
|
||||||
|
this.readsFiles = readsFiles;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract the command-line arguments having to do with reads input
|
||||||
|
* files and store them in an easy-to-work-with package. Constructor
|
||||||
|
* is package protected.
|
||||||
|
* @param arguments GATK parsed command-line arguments.
|
||||||
|
*/
|
||||||
|
Reads( GATKArgumentCollection arguments ) {
|
||||||
|
this.readsFiles = arguments.samFiles;
|
||||||
|
if (arguments.downsampleFraction != null) downsamplingFraction = arguments.downsampleFraction;
|
||||||
|
if (arguments.downsampleCoverage != null) downsampleToCoverage = arguments.downsampleCoverage;
|
||||||
|
if (arguments.maximumReadSorts != null) maxOnFlySorts = arguments.maximumReadSorts;
|
||||||
|
beSafe = !arguments.unsafe;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -15,8 +15,11 @@ import java.util.Map;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.By;
|
||||||
import org.broadinstitute.sting.utils.JVMUtils;
|
import org.broadinstitute.sting.utils.JVMUtils;
|
||||||
import org.broadinstitute.sting.utils.PathUtils;
|
import org.broadinstitute.sting.utils.PathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -95,10 +98,28 @@ public class WalkerManager {
|
||||||
return (Walker) walker.newInstance();
|
return (Walker) walker.newInstance();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the walker class given a walker name.
|
||||||
|
* @param walkerName Name of the walker.
|
||||||
|
* @return Class representing the walker.
|
||||||
|
*/
|
||||||
public Class getWalkerClassByName(String walkerName) {
|
public Class getWalkerClassByName(String walkerName) {
|
||||||
return walkers.get(walkerName);
|
return walkers.get(walkerName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the data source for the provided walker.
|
||||||
|
* @param walker The walker.
|
||||||
|
* @return Which type of data source to traverse over...reads or reference?
|
||||||
|
*/
|
||||||
|
public static DataSource getWalkerDataSource(Walker walker) {
|
||||||
|
Class<? extends Walker> walkerClass = walker.getClass();
|
||||||
|
By byDataSource = walkerClass.getAnnotation(By.class);
|
||||||
|
if( byDataSource == null )
|
||||||
|
throw new StingException("Unable to find By annotation for walker class " + walkerClass.getName());
|
||||||
|
return byDataSource.value();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Load classes internal to the classpath from an arbitrary location.
|
* Load classes internal to the classpath from an arbitrary location.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -1,17 +1,9 @@
|
||||||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
|
|
||||||
import edu.mit.broad.picard.filter.FilteringIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusContextIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
|
|
@ -30,24 +22,25 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
* A LocusContextQueue over which the user can iterate.
|
* A LocusContextQueue over which the user can iterate.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class IterableLocusContextQueue implements LocusContextQueue, LocusIterator {
|
public class IterableLocusContextQueue extends LocusContextQueue implements LocusIterator {
|
||||||
private Shard shard;
|
|
||||||
private LocusContextIterator loci;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* What's the context for the last locus accessed?
|
* What's the context for the last locus accessed?
|
||||||
* @param provider
|
* @param provider
|
||||||
*/
|
*/
|
||||||
private LocusContext nextLocusContext = null;
|
private LocusContext prefetched = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Has this prefetch been consumed? If this flag is set,
|
||||||
|
* the prefetch will skip to the next argument in the system.
|
||||||
|
*/
|
||||||
|
private boolean prefetchConsumed = true;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new queue of locus contexts.
|
* Create a new queue of locus contexts.
|
||||||
* @param provider
|
* @param provider
|
||||||
*/
|
*/
|
||||||
public IterableLocusContextQueue(ShardDataProvider provider) {
|
public IterableLocusContextQueue(ShardDataProvider provider) {
|
||||||
Iterator<SAMRecord> reads = new FilteringIterator(provider.getReadIterator(), new TraversalEngine.locusStreamFilterFunc());
|
super( provider );
|
||||||
this.loci = new LocusContextIteratorByHanger(reads);
|
|
||||||
this.shard = provider.getShard();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -55,7 +48,8 @@ public class IterableLocusContextQueue implements LocusContextQueue, LocusIterat
|
||||||
* @return True if another locus present in this iterator. Otherwise, false.
|
* @return True if another locus present in this iterator. Otherwise, false.
|
||||||
*/
|
*/
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
return loci.hasNext();
|
prefetchLocusContext();
|
||||||
|
return prefetched != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -63,12 +57,36 @@ public class IterableLocusContextQueue implements LocusContextQueue, LocusIterat
|
||||||
* @return Next element in the queue.
|
* @return Next element in the queue.
|
||||||
*/
|
*/
|
||||||
public GenomeLoc next() {
|
public GenomeLoc next() {
|
||||||
do {
|
prefetchLocusContext();
|
||||||
nextLocusContext = loci.next();
|
prefetchConsumed = true;
|
||||||
|
// Signal that the prefetcher needs to grab another entry off the queue.
|
||||||
|
return prefetched.getLocation();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the next locus context within the bounds of a member variable and store
|
||||||
|
* it in the prefetched member variable. When the prefetch is consumed, the 'consumer'
|
||||||
|
* should signal it as such by marking prefetchConsumed = true.
|
||||||
|
*/
|
||||||
|
private void prefetchLocusContext() {
|
||||||
|
if( !prefetchConsumed )
|
||||||
|
return;
|
||||||
|
|
||||||
|
prefetched = null;
|
||||||
|
prefetchConsumed = false;
|
||||||
|
|
||||||
|
// If another locus context bounded by this shard exists, find it.
|
||||||
|
boolean prefetchOutOfBounds = true;
|
||||||
|
while( hasNextLocusContext() && prefetchOutOfBounds ) {
|
||||||
|
prefetched = getNextLocusContext();
|
||||||
|
prefetchOutOfBounds = (prefetched.getLocation().isBefore(shard.getGenomeLoc()) ||
|
||||||
|
prefetched.getLocation().isPast(shard.getGenomeLoc()));
|
||||||
}
|
}
|
||||||
while( nextLocusContext.getLocation().isBefore(shard.getGenomeLoc()) );
|
|
||||||
|
// Can't find a valid prefetch? Set prefetch to null. If prefetched == null and
|
||||||
return nextLocusContext.getLocation();
|
// prefetchConsumed == false, the queue is out of entries.
|
||||||
|
if( prefetchOutOfBounds )
|
||||||
|
prefetched = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -83,9 +101,9 @@ public class IterableLocusContextQueue implements LocusContextQueue, LocusIterat
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public LocusContext peek() {
|
public LocusContext peek() {
|
||||||
if( nextLocusContext == null )
|
if( prefetched == null )
|
||||||
throw new NoSuchElementException("No more elements remaining in queue");
|
throw new NoSuchElementException("No more elements remaining in queue");
|
||||||
return nextLocusContext;
|
return prefetched;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -93,10 +111,8 @@ public class IterableLocusContextQueue implements LocusContextQueue, LocusIterat
|
||||||
* @param seekPoint
|
* @param seekPoint
|
||||||
*/
|
*/
|
||||||
public LocusContextQueue seek( GenomeLoc seekPoint ) {
|
public LocusContextQueue seek( GenomeLoc seekPoint ) {
|
||||||
if( nextLocusContext == null || !seekPoint.equals(nextLocusContext.getLocation()) ) {
|
if( prefetched == null || !seekPoint.equals(prefetched.getLocation()) )
|
||||||
nextLocusContext = null;
|
|
||||||
throw new IllegalArgumentException("IterableLocusContextQueue doesn't support seeking and iterator is in the wrong position.");
|
throw new IllegalArgumentException("IterableLocusContextQueue doesn't support seeking and iterator is in the wrong position.");
|
||||||
}
|
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,17 @@
|
||||||
package org.broadinstitute.sting.gatk.dataSources.providers;
|
package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.LocusContextIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import edu.mit.broad.picard.filter.FilteringIterator;
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
* Date: May 13, 2009
|
* Date: May 13, 2009
|
||||||
|
|
@ -19,17 +29,50 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
* A queue of locus context entries.
|
* A queue of locus context entries.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public interface LocusContextQueue {
|
public abstract class LocusContextQueue {
|
||||||
|
protected Shard shard;
|
||||||
|
|
||||||
|
private Reads sourceInfo;
|
||||||
|
private LocusContextIterator loci;
|
||||||
|
|
||||||
|
public LocusContextQueue(ShardDataProvider provider) {
|
||||||
|
Iterator<SAMRecord> reads = new FilteringIterator(provider.getReadIterator(), new TraversalEngine.locusStreamFilterFunc());
|
||||||
|
this.loci = new LocusContextIteratorByHanger(reads);
|
||||||
|
this.sourceInfo = provider.getReadIterator().getSourceInfo();
|
||||||
|
this.shard = provider.getShard();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the locus context at the given position.
|
* Get the locus context at the given position.
|
||||||
* @return Locus context, or null if no locus context exists at this position.
|
* @return Locus context, or null if no locus context exists at this position.
|
||||||
*/
|
*/
|
||||||
LocusContext peek();
|
public abstract LocusContext peek();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Seek to the given point the queue of locus contexts.
|
* Seek to the given point the queue of locus contexts.
|
||||||
* @param target Target base pair to which to seek. Must be a single base pair.
|
* @param target Target base pair to which to seek. Must be a single base pair.
|
||||||
* @return an instance of itself for parameter chaining.
|
* @return an instance of itself for parameter chaining.
|
||||||
*/
|
*/
|
||||||
public LocusContextQueue seek(GenomeLoc target);
|
public abstract LocusContextQueue seek(GenomeLoc target);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the next locus context, applying filtering as necessary.
|
||||||
|
* @return Locus context to work with.
|
||||||
|
*/
|
||||||
|
protected LocusContext getNextLocusContext() {
|
||||||
|
LocusContext next = loci.next();
|
||||||
|
if( sourceInfo.getDownsampleToCoverage() != null )
|
||||||
|
next.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() );
|
||||||
|
return next;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hasNext()-style iterator for base iterator.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected boolean hasNextLocusContext() {
|
||||||
|
return loci.hasNext();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.dataSources.providers;
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusContextIterator;
|
import org.broadinstitute.sting.gatk.iterators.LocusContextIterator;
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger;
|
import org.broadinstitute.sting.gatk.iterators.LocusContextIteratorByHanger;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
@ -31,10 +32,7 @@ import edu.mit.broad.picard.filter.FilteringIterator;
|
||||||
* implementation of java.util.Queue interface.
|
* implementation of java.util.Queue interface.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class SeekableLocusContextQueue implements LocusContextQueue {
|
public class SeekableLocusContextQueue extends LocusContextQueue {
|
||||||
private Shard shard;
|
|
||||||
private LocusContextIterator loci;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the position to which the last seek was requested.
|
* Gets the position to which the last seek was requested.
|
||||||
*/
|
*/
|
||||||
|
|
@ -53,15 +51,13 @@ public class SeekableLocusContextQueue implements LocusContextQueue {
|
||||||
* @param provider
|
* @param provider
|
||||||
*/
|
*/
|
||||||
public SeekableLocusContextQueue(ShardDataProvider provider) {
|
public SeekableLocusContextQueue(ShardDataProvider provider) {
|
||||||
Iterator<SAMRecord> reads = new FilteringIterator(provider.getReadIterator(), new TraversalEngine.locusStreamFilterFunc());
|
super(provider);
|
||||||
this.loci = new LocusContextIteratorByHanger(reads);
|
|
||||||
this.shard = provider.getShard();
|
|
||||||
|
|
||||||
// Seed the state tracking members with the first possible seek position and the first possible locus context.
|
// Seed the state tracking members with the first possible seek position and the first possible locus context.
|
||||||
seekPoint = new GenomeLoc(shard.getGenomeLoc().getContigIndex(),shard.getGenomeLoc().getStart());
|
seekPoint = new GenomeLoc(shard.getGenomeLoc().getContigIndex(),shard.getGenomeLoc().getStart());
|
||||||
|
|
||||||
if( loci.hasNext() )
|
if( hasNextLocusContext() )
|
||||||
nextLocusContext = loci.next();
|
nextLocusContext = getNextLocusContext();
|
||||||
else
|
else
|
||||||
nextLocusContext = this.createEmptyLocusContext(seekPoint);
|
nextLocusContext = this.createEmptyLocusContext(seekPoint);
|
||||||
}
|
}
|
||||||
|
|
@ -94,15 +90,15 @@ public class SeekableLocusContextQueue implements LocusContextQueue {
|
||||||
seekPoint = (GenomeLoc)target.clone();
|
seekPoint = (GenomeLoc)target.clone();
|
||||||
|
|
||||||
// Search for the next locus context following the target positions.
|
// Search for the next locus context following the target positions.
|
||||||
while (nextLocusContext.getLocation().isBefore(target) && loci.hasNext() ) {
|
while (nextLocusContext.getLocation().isBefore(target) && hasNextLocusContext() ) {
|
||||||
logger.debug(String.format(" current locus is %s vs %s => %d", nextLocusContext.getLocation(),
|
logger.debug(String.format(" current locus is %s vs %s => %d", nextLocusContext.getLocation(),
|
||||||
target,
|
target,
|
||||||
nextLocusContext.getLocation().compareTo(target)));
|
nextLocusContext.getLocation().compareTo(target)));
|
||||||
nextLocusContext = loci.next();
|
nextLocusContext = getNextLocusContext();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Couldn't find a next? Force the nextLocusContext to null.
|
// Couldn't find a next? Force the nextLocusContext to null.
|
||||||
if( nextLocusContext.getLocation().isBefore(target) && !loci.hasNext() )
|
if( nextLocusContext.getLocation().isBefore(target) && !hasNextLocusContext() )
|
||||||
nextLocusContext = createEmptyLocusContext( seekPoint );
|
nextLocusContext = createEmptyLocusContext( seekPoint );
|
||||||
|
|
||||||
return this;
|
return this;
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,9 @@ import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
|
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
|
||||||
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
|
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
|
|
@ -32,6 +35,11 @@ import java.util.List;
|
||||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||||
*/
|
*/
|
||||||
public class SAMDataSource implements SimpleDataSource {
|
public class SAMDataSource implements SimpleDataSource {
|
||||||
|
/**
|
||||||
|
* Backing support for reads.
|
||||||
|
*/
|
||||||
|
private Reads reads = null;
|
||||||
|
|
||||||
/** our SAM data files */
|
/** our SAM data files */
|
||||||
private final SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate;
|
private final SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate;
|
||||||
|
|
||||||
|
|
@ -65,25 +73,18 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
/**
|
/**
|
||||||
* constructor, given sam files
|
* constructor, given sam files
|
||||||
*
|
*
|
||||||
* @param samFiles the list of sam files
|
* @param reads the list of sam files
|
||||||
*/
|
*/
|
||||||
public SAMDataSource(List<?> samFiles) throws SimpleDataSourceLoadException {
|
public SAMDataSource(Reads reads) throws SimpleDataSourceLoadException {
|
||||||
|
this.reads = reads;
|
||||||
|
|
||||||
// check the length
|
// check the length
|
||||||
if (samFiles.size() < 1) {
|
if (reads.getReadsFiles().size() < 1) {
|
||||||
throw new SimpleDataSourceLoadException("SAMDataSource: you must provide a list of length greater then 0");
|
throw new SimpleDataSourceLoadException("SAMDataSource: you must provide a list of length greater then 0");
|
||||||
}
|
}
|
||||||
for (Object fileName : samFiles) {
|
for (File smFile : reads.getReadsFiles()) {
|
||||||
File smFile;
|
|
||||||
if (samFiles.get(0) instanceof String) {
|
|
||||||
smFile = new File((String) samFiles.get(0));
|
|
||||||
} else if (samFiles.get(0) instanceof File) {
|
|
||||||
smFile = (File) fileName;
|
|
||||||
} else {
|
|
||||||
throw new SimpleDataSourceLoadException("SAMDataSource: unknown samFile list type, must be String or File");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!smFile.canRead()) {
|
if (!smFile.canRead()) {
|
||||||
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + fileName);
|
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName());
|
||||||
}
|
}
|
||||||
samFileList.add(smFile);
|
samFileList.add(smFile);
|
||||||
|
|
||||||
|
|
@ -120,7 +121,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
* @param location the genome location to extract data for
|
* @param location the genome location to extract data for
|
||||||
* @return an iterator for that region
|
* @return an iterator for that region
|
||||||
*/
|
*/
|
||||||
public MergingSamRecordIterator2 seekLocus(GenomeLoc location) throws SimpleDataSourceLoadException {
|
public StingSAMIterator seekLocus(GenomeLoc location) throws SimpleDataSourceLoadException {
|
||||||
|
|
||||||
// right now this is pretty damn heavy, it copies the file list into a reader list every time
|
// right now this is pretty damn heavy, it copies the file list into a reader list every time
|
||||||
SamFileHeaderMerger headerMerger = createHeaderMerger();
|
SamFileHeaderMerger headerMerger = createHeaderMerger();
|
||||||
|
|
@ -132,7 +133,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
iter.queryOverlapping(location.getContig(), (int) location.getStart(), (int) location.getStop() + 1);
|
iter.queryOverlapping(location.getContig(), (int) location.getStart(), (int) location.getStop() + 1);
|
||||||
|
|
||||||
// return the iterator
|
// return the iterator
|
||||||
return iter;
|
return StingSAMIteratorAdapter.adapt( reads, iter );
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -144,13 +145,26 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
* @return an iterator for that region
|
* @return an iterator for that region
|
||||||
*/
|
*/
|
||||||
public StingSAMIterator seek(Shard shard) throws SimpleDataSourceLoadException {
|
public StingSAMIterator seek(Shard shard) throws SimpleDataSourceLoadException {
|
||||||
|
StingSAMIterator iterator = null;
|
||||||
if (shard.getShardType() == Shard.ShardType.READ) {
|
if (shard.getShardType() == Shard.ShardType.READ) {
|
||||||
return seekRead((ReadShard) shard);
|
iterator = seekRead((ReadShard) shard);
|
||||||
|
iterator = TraversalEngine.applyDecoratingIterators(true,
|
||||||
|
iterator,
|
||||||
|
reads.getDownsamplingFraction(),
|
||||||
|
reads.getMaxOnTheFlySorts(),
|
||||||
|
reads.getSafetyChecking());
|
||||||
} else if (shard.getShardType() == Shard.ShardType.LOCUS) {
|
} else if (shard.getShardType() == Shard.ShardType.LOCUS) {
|
||||||
return seekLocus(shard.getGenomeLoc());
|
iterator = seekLocus(shard.getGenomeLoc());
|
||||||
|
iterator = TraversalEngine.applyDecoratingIterators(false,
|
||||||
|
iterator,
|
||||||
|
reads.getDownsamplingFraction(),
|
||||||
|
reads.getMaxOnTheFlySorts(),
|
||||||
|
reads.getSafetyChecking());
|
||||||
} else {
|
} else {
|
||||||
throw new StingException("seek: Unknown shard type");
|
throw new StingException("seek: Unknown shard type");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return iterator;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -204,7 +218,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
|
|
||||||
if (bound == null) {
|
if (bound == null) {
|
||||||
shard.signalDone();
|
shard.signalDone();
|
||||||
bound = new BoundedReadIterator(iter, 0);
|
bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter), 0);
|
||||||
}
|
}
|
||||||
return bound;
|
return bound;
|
||||||
}
|
}
|
||||||
|
|
@ -258,7 +272,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
|
|
||||||
// we're good, increment our read cout
|
// we're good, increment our read cout
|
||||||
this.readsTaken += readCount;
|
this.readsTaken += readCount;
|
||||||
return new BoundedReadIterator(iter, readCount);
|
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter), readCount);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -276,7 +290,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
if (lastReadPos == null) {
|
if (lastReadPos == null) {
|
||||||
lastReadPos = new GenomeLoc(iter.getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, 0);
|
lastReadPos = new GenomeLoc(iter.getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, 0);
|
||||||
iter.queryContained(lastReadPos.getContig(), 1, -1);
|
iter.queryContained(lastReadPos.getContig(), 1, -1);
|
||||||
bound = new BoundedReadIterator(iter, readCount);
|
bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter), readCount);
|
||||||
this.readsTaken = readCount;
|
this.readsTaken = readCount;
|
||||||
}
|
}
|
||||||
// we're not at the beginning, not at the end, so we move forward with our ghastly plan...
|
// we're not at the beginning, not at the end, so we move forward with our ghastly plan...
|
||||||
|
|
@ -324,7 +338,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
SamFileHeaderMerger mg = createHeaderMerger();
|
SamFileHeaderMerger mg = createHeaderMerger();
|
||||||
iter = new MergingSamRecordIterator2(mg);
|
iter = new MergingSamRecordIterator2(mg);
|
||||||
iter.queryContained(lastReadPos.getContig(), 1, Integer.MAX_VALUE);
|
iter.queryContained(lastReadPos.getContig(), 1, Integer.MAX_VALUE);
|
||||||
return new BoundedReadIterator(iter,readCount);
|
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter),readCount);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -349,7 +363,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
throw new StingException("Danger: weve run out reads in fastMappedReadSeek");
|
throw new StingException("Danger: weve run out reads in fastMappedReadSeek");
|
||||||
//return null;
|
//return null;
|
||||||
}
|
}
|
||||||
bound = new BoundedReadIterator(iter, readCount);
|
bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter), readCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.OutputTracker;
|
import org.broadinstitute.sting.gatk.OutputTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
@ -58,7 +59,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Reduce
|
||||||
* @param refFile Reference for driving the traversal.
|
* @param refFile Reference for driving the traversal.
|
||||||
* @param nThreadsToUse maximum number of threads to use to do the work
|
* @param nThreadsToUse maximum number of threads to use to do the work
|
||||||
*/
|
*/
|
||||||
protected HierarchicalMicroScheduler( Walker walker, List<File> reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods, int nThreadsToUse ) {
|
protected HierarchicalMicroScheduler( Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods, int nThreadsToUse ) {
|
||||||
super( walker, reads, refFile, rods );
|
super( walker, reads, refFile, rods );
|
||||||
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
|
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
|
||||||
if( !(traversalEngine instanceof TraverseLociByReference) )
|
if( !(traversalEngine instanceof TraverseLociByReference) )
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -20,7 +21,7 @@ public class LinearMicroScheduler extends MicroScheduler {
|
||||||
* @param reads Reads file(s) to process.
|
* @param reads Reads file(s) to process.
|
||||||
* @param refFile Reference for driving the traversal.
|
* @param refFile Reference for driving the traversal.
|
||||||
*/
|
*/
|
||||||
protected LinearMicroScheduler( Walker walker, List<File> reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods ) {
|
protected LinearMicroScheduler( Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods ) {
|
||||||
super(walker, reads, refFile, rods);
|
super(walker, reads, refFile, rods);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||||
|
|
@ -51,7 +52,7 @@ public abstract class MicroScheduler {
|
||||||
* @param nThreadsToUse Number of threads to utilize.
|
* @param nThreadsToUse Number of threads to utilize.
|
||||||
* @return The best-fit microscheduler.
|
* @return The best-fit microscheduler.
|
||||||
*/
|
*/
|
||||||
public static MicroScheduler create( Walker walker, List<File> reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods, int nThreadsToUse ) {
|
public static MicroScheduler create( Walker walker, Reads reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods, int nThreadsToUse ) {
|
||||||
if( walker instanceof TreeReducible && nThreadsToUse > 1 ) {
|
if( walker instanceof TreeReducible && nThreadsToUse > 1 ) {
|
||||||
logger.info("Creating hierarchical microscheduler");
|
logger.info("Creating hierarchical microscheduler");
|
||||||
return new HierarchicalMicroScheduler( walker, reads, ref, rods, nThreadsToUse );
|
return new HierarchicalMicroScheduler( walker, reads, ref, rods, nThreadsToUse );
|
||||||
|
|
@ -67,11 +68,11 @@ public abstract class MicroScheduler {
|
||||||
* @param reads The reads.
|
* @param reads The reads.
|
||||||
* @param refFile File pointer to the reference.
|
* @param refFile File pointer to the reference.
|
||||||
*/
|
*/
|
||||||
protected MicroScheduler( Walker walker, List<File> reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods ) {
|
protected MicroScheduler( Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods ) {
|
||||||
if (walker instanceof ReadWalker) {
|
if (walker instanceof ReadWalker) {
|
||||||
traversalEngine = new TraverseReads(reads, refFile, rods);
|
traversalEngine = new TraverseReads(reads.getReadsFiles(), refFile, rods);
|
||||||
} else {
|
} else {
|
||||||
traversalEngine = new TraverseLociByReference(reads, refFile, rods);
|
traversalEngine = new TraverseLociByReference(reads.getReadsFiles(), refFile, rods);
|
||||||
}
|
}
|
||||||
|
|
||||||
this.reads = getReadsDataSource( reads );
|
this.reads = getReadsDataSource( reads );
|
||||||
|
|
@ -132,16 +133,8 @@ public abstract class MicroScheduler {
|
||||||
* Gets a data source for the given set of reads.
|
* Gets a data source for the given set of reads.
|
||||||
* @return A data source for the given set of reads.
|
* @return A data source for the given set of reads.
|
||||||
*/
|
*/
|
||||||
private SAMDataSource getReadsDataSource( List<File> reads ) {
|
private SAMDataSource getReadsDataSource( Reads reads ) {
|
||||||
List<File> unpackedReads = null;
|
SAMDataSource dataSource = new SAMDataSource( reads );
|
||||||
try {
|
|
||||||
unpackedReads = TraversalEngine.unpackReads(reads);
|
|
||||||
}
|
|
||||||
catch( FileNotFoundException ex ) {
|
|
||||||
throw new StingException( "Cannot unpack list of reads files", ex );
|
|
||||||
}
|
|
||||||
|
|
||||||
SAMDataSource dataSource = new SAMDataSource( unpackedReads );
|
|
||||||
|
|
||||||
// Side effect: initialize the traversal engine with reads data.
|
// Side effect: initialize the traversal engine with reads data.
|
||||||
// TODO: Give users a dedicated way of getting the header so that the MicroScheduler
|
// TODO: Give users a dedicated way of getting the header so that the MicroScheduler
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@ import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* User: aaron
|
* User: aaron
|
||||||
|
|
@ -68,6 +70,14 @@ public class BoundedReadIterator implements StingSAMIterator {
|
||||||
this.doNotUseThatUnmappedReadPile = useThem;
|
this.doNotUseThatUnmappedReadPile = useThem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves information about reads sources.
|
||||||
|
* @return Info about the sources of reads.
|
||||||
|
*/
|
||||||
|
public Reads getSourceInfo() {
|
||||||
|
return iterator.getSourceInfo();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public SAMFileHeader getHeader() {
|
public SAMFileHeader getHeader() {
|
||||||
// todo: this is bad, we need an iterface out there for samrecords that supports getting the header,
|
// todo: this is bad, we need an iterface out there for samrecords that supports getting the header,
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@ import net.sf.samtools.SAMRecord;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
|
||||||
|
|
||||||
public class DownsampleIterator implements StingSAMIterator {
|
public class DownsampleIterator implements StingSAMIterator {
|
||||||
|
|
||||||
|
|
@ -20,6 +22,15 @@ public class DownsampleIterator implements StingSAMIterator {
|
||||||
next = getNextRecord();
|
next = getNextRecord();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves information about reads sources.
|
||||||
|
* @return Info about the sources of reads.
|
||||||
|
*/
|
||||||
|
public Reads getSourceInfo() {
|
||||||
|
return it.getSourceInfo();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
return next != null;
|
return next != null;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,10 @@ import edu.mit.broad.picard.sam.ReservedTagConstants;
|
||||||
import edu.mit.broad.picard.sam.SamFileHeaderMerger;
|
import edu.mit.broad.picard.sam.SamFileHeaderMerger;
|
||||||
import edu.mit.broad.picard.util.PeekableIterator;
|
import edu.mit.broad.picard.util.PeekableIterator;
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
import java.lang.reflect.Constructor;
|
import java.lang.reflect.Constructor;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
|
@ -28,7 +31,7 @@ import java.util.PriorityQueue;
|
||||||
* iterable stream. The underlying iterators/files must all have the same sort order unless
|
* iterable stream. The underlying iterators/files must all have the same sort order unless
|
||||||
* the requested output format is unsorted, in which case any combination is valid.
|
* the requested output format is unsorted, in which case any combination is valid.
|
||||||
*/
|
*/
|
||||||
public class MergingSamRecordIterator2 implements StingSAMIterator {
|
public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>, Iterable<SAMRecord> {
|
||||||
protected PriorityQueue<ComparableSamRecordIterator> pq = null;
|
protected PriorityQueue<ComparableSamRecordIterator> pq = null;
|
||||||
protected final SamFileHeaderMerger samHeaderMerger;
|
protected final SamFileHeaderMerger samHeaderMerger;
|
||||||
protected final SAMFileHeader.SortOrder sortOrder;
|
protected final SAMFileHeader.SortOrder sortOrder;
|
||||||
|
|
@ -272,6 +275,7 @@ public class MergingSamRecordIterator2 implements StingSAMIterator {
|
||||||
|
|
||||||
// Should replace picard class with the same name
|
// Should replace picard class with the same name
|
||||||
class ComparableSamRecordIterator extends PeekableIterator<SAMRecord> implements Comparable<ComparableSamRecordIterator>, StingSAMIterator {
|
class ComparableSamRecordIterator extends PeekableIterator<SAMRecord> implements Comparable<ComparableSamRecordIterator>, StingSAMIterator {
|
||||||
|
private Reads sourceInfo;
|
||||||
private final Comparator<SAMRecord> comparator;
|
private final Comparator<SAMRecord> comparator;
|
||||||
private final SAMFileReader reader;
|
private final SAMFileReader reader;
|
||||||
|
|
||||||
|
|
@ -295,6 +299,12 @@ class ComparableSamRecordIterator extends PeekableIterator<SAMRecord> implements
|
||||||
this.comparator = comparator;
|
this.comparator = comparator;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Reads getSourceInfo() {
|
||||||
|
if( sourceInfo == null )
|
||||||
|
throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework.");
|
||||||
|
return sourceInfo;
|
||||||
|
}
|
||||||
|
|
||||||
/** Returns the reader from which this iterator was constructed. */
|
/** Returns the reader from which this iterator was constructed. */
|
||||||
public SAMFileReader getReader() {
|
public SAMFileReader getReader() {
|
||||||
return reader;
|
return reader;
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
package org.broadinstitute.sting.gatk.iterators;
|
package org.broadinstitute.sting.gatk.iterators;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.ComparableSAMRecord;
|
import org.broadinstitute.sting.utils.ComparableSAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
|
@ -11,11 +13,11 @@ import java.util.Iterator;
|
||||||
// TODO: Deprecate?
|
// TODO: Deprecate?
|
||||||
// I don't think we need this if we're only allowing sorted and indexed BAM Files in the GATK - Aaron
|
// I don't think we need this if we're only allowing sorted and indexed BAM Files in the GATK - Aaron
|
||||||
public class SortSamIterator implements StingSAMIterator {
|
public class SortSamIterator implements StingSAMIterator {
|
||||||
|
private Reads sourceInfo;
|
||||||
|
private Iterator<ComparableSAMRecord> it;
|
||||||
|
|
||||||
Iterator<ComparableSAMRecord> it;
|
public SortSamIterator(StingSAMIterator unsortedIter, int maxSorts) {
|
||||||
|
sourceInfo = unsortedIter.getSourceInfo();
|
||||||
public SortSamIterator(Iterator<SAMRecord> unsortedIter, int maxSorts) {
|
|
||||||
|
|
||||||
ArrayList<ComparableSAMRecord> list = new ArrayList<ComparableSAMRecord>();
|
ArrayList<ComparableSAMRecord> list = new ArrayList<ComparableSAMRecord>();
|
||||||
while (unsortedIter.hasNext()) {
|
while (unsortedIter.hasNext()) {
|
||||||
list.add(new ComparableSAMRecord(unsortedIter.next()));
|
list.add(new ComparableSAMRecord(unsortedIter.next()));
|
||||||
|
|
@ -27,6 +29,16 @@ public class SortSamIterator implements StingSAMIterator {
|
||||||
it = list.iterator();
|
it = list.iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves information about reads sources.
|
||||||
|
* @return Info about the sources of reads.
|
||||||
|
*/
|
||||||
|
public Reads getSourceInfo() {
|
||||||
|
if( sourceInfo == null )
|
||||||
|
throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework.");
|
||||||
|
return sourceInfo;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean hasNext() { return it.hasNext(); }
|
public boolean hasNext() { return it.hasNext(); }
|
||||||
public SAMRecord next() { return it.next().getRecord(); }
|
public SAMRecord next() { return it.next().getRecord(); }
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.iterators;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* User: aaron
|
* User: aaron
|
||||||
|
|
@ -28,4 +29,10 @@ import net.sf.samtools.util.CloseableIterator;
|
||||||
* This is the standard interface for all iterators in the Sting package that iterate over SAMRecords
|
* This is the standard interface for all iterators in the Sting package that iterate over SAMRecords
|
||||||
*/
|
*/
|
||||||
public interface StingSAMIterator extends CloseableIterator<SAMRecord>, Iterable<SAMRecord> {
|
public interface StingSAMIterator extends CloseableIterator<SAMRecord>, Iterable<SAMRecord> {
|
||||||
|
/**
|
||||||
|
* Gets source information for the reads. Contains information about the original reads
|
||||||
|
* files, plus information about downsampling, etc.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public Reads getSourceInfo();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,9 @@ import net.sf.samtools.util.CloseableIterator;
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* User: aaron
|
* User: aaron
|
||||||
|
|
@ -31,14 +34,14 @@ import java.util.Iterator;
|
||||||
* <p/>
|
* <p/>
|
||||||
* This class adapts other SAMRecord iterators to the StingSAMIterator
|
* This class adapts other SAMRecord iterators to the StingSAMIterator
|
||||||
*/
|
*/
|
||||||
public class StingSAMIteratorAdapter {
|
public class StingSAMIteratorAdapter {
|
||||||
|
|
||||||
public static StingSAMIterator adapt(Iterator<SAMRecord> iter) {
|
public static StingSAMIterator adapt(Reads sourceInfo, Iterator<SAMRecord> iter) {
|
||||||
return new PrivateStringSAMIterator(iter);
|
return new PrivateStringSAMIterator(sourceInfo, iter);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static StingSAMIterator adapt(CloseableIterator<SAMRecord> iter) {
|
public static StingSAMIterator adapt(Reads sourceInfo, CloseableIterator<SAMRecord> iter) {
|
||||||
return new PrivateStringSAMCloseableIterator(iter);
|
return new PrivateStringSAMCloseableIterator(sourceInfo, iter);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -49,12 +52,20 @@ public class StingSAMIteratorAdapter {
|
||||||
* methods that implement the iterable<> interface and the close() method from CloseableIterator
|
* methods that implement the iterable<> interface and the close() method from CloseableIterator
|
||||||
*/
|
*/
|
||||||
class PrivateStringSAMIterator implements StingSAMIterator {
|
class PrivateStringSAMIterator implements StingSAMIterator {
|
||||||
|
private Reads sourceInfo = null;
|
||||||
private Iterator<SAMRecord> iter = null;
|
private Iterator<SAMRecord> iter = null;
|
||||||
|
|
||||||
PrivateStringSAMIterator(Iterator<SAMRecord> iter) {
|
PrivateStringSAMIterator(Reads sourceInfo, Iterator<SAMRecord> iter) {
|
||||||
|
this.sourceInfo = sourceInfo;
|
||||||
this.iter = iter;
|
this.iter = iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Reads getSourceInfo() {
|
||||||
|
if( sourceInfo == null )
|
||||||
|
throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework.");
|
||||||
|
return sourceInfo;
|
||||||
|
}
|
||||||
|
|
||||||
public void close() {
|
public void close() {
|
||||||
// do nothing, we can't close the iterator anyway.
|
// do nothing, we can't close the iterator anyway.
|
||||||
}
|
}
|
||||||
|
|
@ -82,12 +93,20 @@ class PrivateStringSAMIterator implements StingSAMIterator {
|
||||||
* methods that implement the iterable<> interface.
|
* methods that implement the iterable<> interface.
|
||||||
*/
|
*/
|
||||||
class PrivateStringSAMCloseableIterator implements StingSAMIterator {
|
class PrivateStringSAMCloseableIterator implements StingSAMIterator {
|
||||||
|
private Reads sourceInfo = null;
|
||||||
private CloseableIterator<SAMRecord> iter = null;
|
private CloseableIterator<SAMRecord> iter = null;
|
||||||
|
|
||||||
PrivateStringSAMCloseableIterator(CloseableIterator<SAMRecord> iter) {
|
PrivateStringSAMCloseableIterator(Reads sourceInfo, CloseableIterator<SAMRecord> iter) {
|
||||||
|
this.sourceInfo = sourceInfo;
|
||||||
this.iter = iter;
|
this.iter = iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Reads getSourceInfo() {
|
||||||
|
if( sourceInfo == null )
|
||||||
|
throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework.");
|
||||||
|
return sourceInfo;
|
||||||
|
}
|
||||||
|
|
||||||
public void close() {
|
public void close() {
|
||||||
iter.close();
|
iter.close();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.iterators;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.util.RuntimeIOException;
|
import net.sf.samtools.util.RuntimeIOException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
|
@ -23,6 +24,15 @@ public class VerifyingSamIterator implements StingSAMIterator {
|
||||||
this.it = it;
|
this.it = it;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves information about reads sources.
|
||||||
|
* @return Info about the sources of reads.
|
||||||
|
*/
|
||||||
|
public Reads getSourceInfo() {
|
||||||
|
return it.getSourceInfo();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public boolean hasNext() { return this.it.hasNext(); }
|
public boolean hasNext() { return this.it.hasNext(); }
|
||||||
public SAMRecord next() {
|
public SAMRecord next() {
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||||
|
|
||||||
|
|
@ -131,11 +132,11 @@ public abstract class TraversalEngine {
|
||||||
this.walkOverAllSites = walkOverAllSites;
|
this.walkOverAllSites = walkOverAllSites;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setDebugging(final boolean d) {
|
private void setDebugging(final boolean d) {
|
||||||
DEBUGGING = d;
|
DEBUGGING = d;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setSafetyChecking(final boolean beSafeP) {
|
private void setSafetyChecking(final boolean beSafeP) {
|
||||||
if (!beSafeP)
|
if (!beSafeP)
|
||||||
logger.warn("*** Turning off safety checking, I hope you know what you are doing. Errors will result in debugging assert failures and other inscrutable messages...");
|
logger.warn("*** Turning off safety checking, I hope you know what you are doing. Errors will result in debugging assert failures and other inscrutable messages...");
|
||||||
this.beSafeP = beSafeP;
|
this.beSafeP = beSafeP;
|
||||||
|
|
@ -147,26 +148,39 @@ public abstract class TraversalEngine {
|
||||||
this.FILTER_UNSORTED_READS = filterUnsorted;
|
this.FILTER_UNSORTED_READS = filterUnsorted;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setSortOnFly(final int maxReadsToSort) {
|
private void setSortOnFly(final int maxReadsToSort) {
|
||||||
logger.info("Sorting read file on the fly: max reads allowed is " + maxReadsToSort);
|
logger.info("Sorting read file on the fly: max reads allowed is " + maxReadsToSort);
|
||||||
SORT_ON_FLY = true;
|
SORT_ON_FLY = true;
|
||||||
maxOnFlySorts = maxReadsToSort;
|
maxOnFlySorts = maxReadsToSort;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setSortOnFly() { setSortOnFly(100000); }
|
private void setSortOnFly() { setSortOnFly(100000); }
|
||||||
|
|
||||||
public void setDownsampleByFraction(final double fraction) {
|
private void setDownsampleByFraction(final double fraction) {
|
||||||
logger.info("Downsampling to approximately " + (fraction * 100.0) + "% of filtered reads");
|
logger.info("Downsampling to approximately " + (fraction * 100.0) + "% of filtered reads");
|
||||||
DOWNSAMPLE_BY_FRACTION = true;
|
DOWNSAMPLE_BY_FRACTION = true;
|
||||||
downsamplingFraction = fraction;
|
downsamplingFraction = fraction;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setDownsampleByCoverage(final int coverage) {
|
private void setDownsampleByCoverage(final int coverage) {
|
||||||
logger.info("Downsampling to coverage " + coverage);
|
logger.info("Downsampling to coverage " + coverage);
|
||||||
DOWNSAMPLE_BY_COVERAGE = true;
|
DOWNSAMPLE_BY_COVERAGE = true;
|
||||||
downsamplingCoverage = coverage;
|
downsamplingCoverage = coverage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets up read filtering performed by the traversal engine.
|
||||||
|
* @param reads Read filters to instantiate as the traversal engine walks over the data.
|
||||||
|
* @deprecated Should only be used by old-style traversals.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
public void setReadFilters( Reads reads) {
|
||||||
|
if( reads.getDownsamplingFraction() != null ) setDownsampleByFraction(reads.getDownsamplingFraction());
|
||||||
|
if( reads.getDownsampleToCoverage() != null ) setDownsampleByCoverage(reads.getDownsampleToCoverage());
|
||||||
|
if( reads.getMaxOnTheFlySorts() != null ) setSortOnFly(reads.getMaxOnTheFlySorts());
|
||||||
|
if( reads.getSafetyChecking() != null ) setSafetyChecking(reads.getSafetyChecking());
|
||||||
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// functions for dealing locations (areas of the genome we're traversing over)
|
// functions for dealing locations (areas of the genome we're traversing over)
|
||||||
|
|
@ -318,36 +332,9 @@ public abstract class TraversalEngine {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Unpack the files to be processed, given a list of files. That list of files can
|
|
||||||
* itself contain lists of other files to be read.
|
|
||||||
* @param inputFiles
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public static List<File> unpackReads( List<File> inputFiles ) throws FileNotFoundException {
|
|
||||||
List<File> unpackedReads = new ArrayList<File>();
|
|
||||||
for( File inputFile: inputFiles ) {
|
|
||||||
if( inputFile.getName().endsWith(".list") ) {
|
|
||||||
for( String fileName : new xReadLines(inputFile) )
|
|
||||||
unpackedReads.add( new File(fileName) );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
unpackedReads.add( inputFile );
|
|
||||||
}
|
|
||||||
return unpackedReads;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected Iterator<SAMRecord> initializeReads() {
|
protected Iterator<SAMRecord> initializeReads() {
|
||||||
List<File> allReadsFiles = null;
|
if( readsFiles.size() == 1 )
|
||||||
try {
|
samReader = initializeSAMFile(readsFiles.get(0));
|
||||||
allReadsFiles = unpackReads( readsFiles );
|
|
||||||
}
|
|
||||||
catch( FileNotFoundException ex ) {
|
|
||||||
throw new RuntimeException("Unable to unpack reads", ex );
|
|
||||||
}
|
|
||||||
|
|
||||||
if( allReadsFiles.size() == 1 )
|
|
||||||
samReader = initializeSAMFile(allReadsFiles.get(0));
|
|
||||||
else
|
else
|
||||||
samReader = null;
|
samReader = null;
|
||||||
return wrapReadsIterator(getReadsIterator(samReader), true);
|
return wrapReadsIterator(getReadsIterator(samReader), true);
|
||||||
|
|
@ -358,16 +345,9 @@ public abstract class TraversalEngine {
|
||||||
if ( samReader == null && readsFiles.size() > 0 ) {
|
if ( samReader == null && readsFiles.size() > 0 ) {
|
||||||
SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate;
|
SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate;
|
||||||
|
|
||||||
List<File> allReadsFiles = null;
|
|
||||||
List<SAMFileReader> readers = new ArrayList<SAMFileReader>();
|
List<SAMFileReader> readers = new ArrayList<SAMFileReader>();
|
||||||
try {
|
|
||||||
allReadsFiles = unpackReads( readsFiles );
|
|
||||||
}
|
|
||||||
catch(FileNotFoundException ex) {
|
|
||||||
throw new RuntimeException("Unable to unpack reads", ex);
|
|
||||||
}
|
|
||||||
|
|
||||||
for ( File readsFile: allReadsFiles ) {
|
for ( File readsFile: readsFiles ) {
|
||||||
SAMFileReader reader = initializeSAMFile(readsFile);
|
SAMFileReader reader = initializeSAMFile(readsFile);
|
||||||
readers.add(reader);
|
readers.add(reader);
|
||||||
}
|
}
|
||||||
|
|
@ -382,28 +362,52 @@ public abstract class TraversalEngine {
|
||||||
|
|
||||||
@Deprecated
|
@Deprecated
|
||||||
protected StingSAMIterator wrapReadsIterator( final Iterator<SAMRecord> rawIterator, final boolean enableVerification ) {
|
protected StingSAMIterator wrapReadsIterator( final Iterator<SAMRecord> rawIterator, final boolean enableVerification ) {
|
||||||
StingSAMIterator wrappedIterator = StingSAMIteratorAdapter.adapt(rawIterator);
|
// Reads sourceInfo is gone by this point in the traversal engine. Stub in a null and rely on the iterator to
|
||||||
wrappedIterator = applyDecoratingIterators(enableVerification, wrappedIterator);
|
// throw an exception if reads info isn't present.
|
||||||
|
StingSAMIterator wrappedIterator = StingSAMIteratorAdapter.adapt(null,rawIterator);
|
||||||
|
wrappedIterator = applyDecoratingIterators(enableVerification,wrappedIterator);
|
||||||
|
|
||||||
if (THREADED_IO) {
|
if (THREADED_IO) {
|
||||||
logger.info(String.format("Enabling threaded I/O with buffer of %d reads", THREADED_IO_BUFFER_SIZE));
|
logger.info(String.format("Enabling threaded I/O with buffer of %d reads", THREADED_IO_BUFFER_SIZE));
|
||||||
wrappedIterator = StingSAMIteratorAdapter.adapt(new ThreadedIterator<SAMRecord>(wrappedIterator, THREADED_IO_BUFFER_SIZE));
|
wrappedIterator = StingSAMIteratorAdapter.adapt(null,new ThreadedIterator<SAMRecord>(wrappedIterator, THREADED_IO_BUFFER_SIZE));
|
||||||
}
|
}
|
||||||
|
|
||||||
return wrappedIterator;
|
return wrappedIterator;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected StingSAMIterator applyDecoratingIterators(boolean enableVerification, StingSAMIterator wrappedIterator) {
|
/**
|
||||||
|
* Repackage instance variables and call static method.
|
||||||
|
* TODO: This method's days are numbered.
|
||||||
|
* @param enableVerification
|
||||||
|
* @param wrappedIterator
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected StingSAMIterator applyDecoratingIterators( final boolean enableVerification, final StingSAMIterator wrappedIterator ) {
|
||||||
|
return applyDecoratingIterators(enableVerification,
|
||||||
|
wrappedIterator,
|
||||||
|
DOWNSAMPLE_BY_FRACTION ? downsamplingFraction : null,
|
||||||
|
SORT_ON_FLY ? maxOnFlySorts : null,
|
||||||
|
beSafeP);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* WARNING: In TraversalEngine for backward compatibility ONLY. Reads are not used as the data source, only as parameters
|
||||||
|
* for validation.
|
||||||
|
*/
|
||||||
|
public static StingSAMIterator applyDecoratingIterators(boolean enableVerification,
|
||||||
|
StingSAMIterator wrappedIterator,
|
||||||
|
Double downsamplingFraction,
|
||||||
|
Integer maxOnFlySorts,
|
||||||
|
Boolean beSafeP) {
|
||||||
// NOTE: this (and other filtering) should be done before on-the-fly sorting
|
// NOTE: this (and other filtering) should be done before on-the-fly sorting
|
||||||
// as there is no reason to sort something that we will end of throwing away
|
// as there is no reason to sort something that we will end of throwing away
|
||||||
if (DOWNSAMPLE_BY_FRACTION)
|
if (downsamplingFraction != null)
|
||||||
wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction);
|
wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction);
|
||||||
|
|
||||||
if (SORT_ON_FLY)
|
if (maxOnFlySorts != null)
|
||||||
wrappedIterator = new SortSamIterator(wrappedIterator, maxOnFlySorts);
|
wrappedIterator = new SortSamIterator(wrappedIterator, maxOnFlySorts);
|
||||||
|
|
||||||
if (beSafeP && enableVerification)
|
if (beSafeP != null && beSafeP && enableVerification)
|
||||||
wrappedIterator = new VerifyingSamIterator(wrappedIterator);
|
wrappedIterator = new VerifyingSamIterator(wrappedIterator);
|
||||||
return wrappedIterator;
|
return wrappedIterator;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,11 +2,15 @@ package org.broadinstitute.sting.gatk.traversals;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceLocusIterator;
|
import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceLocusIterator;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.providers.SeekableLocusContextQueue;
|
import org.broadinstitute.sting.gatk.dataSources.providers.SeekableLocusContextQueue;
|
||||||
|
import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextQueue;
|
||||||
|
import org.broadinstitute.sting.gatk.dataSources.providers.IterableLocusContextQueue;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -33,7 +37,6 @@ public class TraverseLociByReference extends TraversalEngine {
|
||||||
*/
|
*/
|
||||||
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
|
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||||
|
|
||||||
|
|
||||||
public TraverseLociByReference(List<File> reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
public TraverseLociByReference(List<File> reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||||
super( reads, ref, rods );
|
super( reads, ref, rods );
|
||||||
}
|
}
|
||||||
|
|
@ -57,8 +60,23 @@ public class TraverseLociByReference extends TraversalEngine {
|
||||||
|
|
||||||
LocusWalker<M, T> locusWalker = (LocusWalker<M, T>)walker;
|
LocusWalker<M, T> locusWalker = (LocusWalker<M, T>)walker;
|
||||||
|
|
||||||
LocusIterator locusIterator = new ReferenceLocusIterator( dataProvider );
|
LocusIterator locusIterator = null;
|
||||||
SeekableLocusContextQueue locusContextQueue = new SeekableLocusContextQueue( dataProvider );
|
LocusContextQueue locusContextQueue = null;
|
||||||
|
|
||||||
|
DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
|
||||||
|
switch( dataSource ) {
|
||||||
|
case REFERENCE:
|
||||||
|
locusIterator = new ReferenceLocusIterator( dataProvider );
|
||||||
|
locusContextQueue = new SeekableLocusContextQueue( dataProvider );
|
||||||
|
break;
|
||||||
|
case READS:
|
||||||
|
IterableLocusContextQueue iterableQueue = new IterableLocusContextQueue( dataProvider );
|
||||||
|
locusIterator = iterableQueue;
|
||||||
|
locusContextQueue = iterableQueue;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
||||||
|
}
|
||||||
|
|
||||||
// We keep processing while the next reference location is within the interval
|
// We keep processing while the next reference location is within the interval
|
||||||
while( locusIterator.hasNext() ) {
|
while( locusIterator.hasNext() ) {
|
||||||
|
|
@ -72,9 +90,6 @@ public class TraverseLociByReference extends TraversalEngine {
|
||||||
LocusContext locus = locusContextQueue.seek( site ).peek();
|
LocusContext locus = locusContextQueue.seek( site ).peek();
|
||||||
char refBase = dataProvider.getReferenceBase( site );
|
char refBase = dataProvider.getReferenceBase( site );
|
||||||
|
|
||||||
if ( DOWNSAMPLE_BY_COVERAGE )
|
|
||||||
locus.downsampleToCoverage(downsamplingCoverage);
|
|
||||||
|
|
||||||
final boolean keepMeP = locusWalker.filter(tracker, refBase, locus);
|
final boolean keepMeP = locusWalker.filter(tracker, refBase, locus);
|
||||||
if (keepMeP) {
|
if (keepMeP) {
|
||||||
M x = locusWalker.map(tracker, refBase, locus);
|
M x = locusWalker.map(tracker, refBase, locus);
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,6 @@ import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard;
|
import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
|
@ -88,11 +87,8 @@ public class TraverseReads extends TraversalEngine {
|
||||||
|
|
||||||
ReadWalker<M, T> readWalker = (ReadWalker<M, T>) walker;
|
ReadWalker<M, T> readWalker = (ReadWalker<M, T>) walker;
|
||||||
|
|
||||||
// we allow a bunch of wrapping iterators for down sampling, threadingIO, etc.
|
|
||||||
StingSAMIterator it = applyDecoratingIterators(true, dataProvider.getReadIterator());
|
|
||||||
|
|
||||||
// while we still have more reads
|
// while we still have more reads
|
||||||
for (SAMRecord read : it) {
|
for (SAMRecord read : dataProvider.getReadIterator()) {
|
||||||
|
|
||||||
// our locus context
|
// our locus context
|
||||||
LocusContext locus = null;
|
LocusContext locus = null;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,33 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers;
|
||||||
|
|
||||||
|
import java.lang.annotation.Documented;
|
||||||
|
import java.lang.annotation.Inherited;
|
||||||
|
import java.lang.annotation.Retention;
|
||||||
|
import java.lang.annotation.RetentionPolicy;
|
||||||
|
import java.lang.annotation.Target;
|
||||||
|
import java.lang.annotation.ElementType;
|
||||||
|
/**
|
||||||
|
* User: hanna
|
||||||
|
* Date: May 14, 2009
|
||||||
|
* Time: 1:51:22 PM
|
||||||
|
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||||
|
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||||
|
* All rights are reserved.
|
||||||
|
*
|
||||||
|
* Users acknowledge that this software is supplied without any warranty or support.
|
||||||
|
* The Broad Institute is not responsible for its use, misuse, or
|
||||||
|
* functionality.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allows the walker to indicate what type of data it wants to consume.
|
||||||
|
*/
|
||||||
|
|
||||||
|
@Documented
|
||||||
|
@Inherited
|
||||||
|
@Retention(RetentionPolicy.RUNTIME)
|
||||||
|
@Target(ElementType.TYPE)
|
||||||
|
public @interface By {
|
||||||
|
DataSource value();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers;
|
||||||
|
/**
|
||||||
|
* User: hanna
|
||||||
|
* Date: May 14, 2009
|
||||||
|
* Time: 2:12:33 PM
|
||||||
|
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||||
|
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||||
|
* All rights are reserved.
|
||||||
|
*
|
||||||
|
* Users acknowledge that this software is supplied without any warranty or support.
|
||||||
|
* The Broad Institute is not responsible for its use, misuse, or
|
||||||
|
* functionality.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allow user to choose between a number of different data sources.
|
||||||
|
*/
|
||||||
|
public enum DataSource {
|
||||||
|
READS,
|
||||||
|
REFERENCE
|
||||||
|
}
|
||||||
|
|
@ -1,11 +1,8 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.gatk.walkers;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: mdepristo
|
* User: mdepristo
|
||||||
|
|
@ -13,6 +10,7 @@ import java.util.List;
|
||||||
* Time: 2:52:28 PM
|
* Time: 2:52:28 PM
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
|
@By(DataSource.READS)
|
||||||
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||||
// Do we actually want to operate on the context?
|
// Do we actually want to operate on the context?
|
||||||
public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ import net.sf.samtools.SAMRecord;
|
||||||
* Time: 11:23:14 AM
|
* Time: 11:23:14 AM
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
|
@By(DataSource.REFERENCE)
|
||||||
public class PrintLocusContextWalker extends LocusWalker<LocusContext, Integer> implements TreeReducible<Integer> {
|
public class PrintLocusContextWalker extends LocusWalker<LocusContext, Integer> implements TreeReducible<Integer> {
|
||||||
public LocusContext map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
public LocusContext map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||||
out.printf( "In map: ref = %c, loc = %s, reads = %s%n", ref,
|
out.printf( "In map: ref = %c, loc = %s, reads = %s%n", ref,
|
||||||
|
|
|
||||||
|
|
@ -82,9 +82,9 @@ public class GATKArgumentCollectionTest extends BaseTest {
|
||||||
collect.HAPMAPChipFile = "HAPMAPChipFile".toLowerCase();
|
collect.HAPMAPChipFile = "HAPMAPChipFile".toLowerCase();
|
||||||
collect.enabledThreadedIO = true;
|
collect.enabledThreadedIO = true;
|
||||||
collect.unsafe = false;
|
collect.unsafe = false;
|
||||||
collect.maximumReadSorts = "maximumReadSorts".toLowerCase();
|
collect.maximumReadSorts = null;
|
||||||
collect.downsampleFraction = "downsampleFraction".toLowerCase();
|
collect.downsampleFraction = null;
|
||||||
collect.downsampleCoverage = "downsampleCoverage".toLowerCase();
|
collect.downsampleCoverage = null;
|
||||||
collect.intervals = "intervals".toLowerCase();
|
collect.intervals = "intervals".toLowerCase();
|
||||||
collect.walkAllLoci = true;
|
collect.walkAllLoci = true;
|
||||||
collect.disableThreading = false;
|
collect.disableThreading = false;
|
||||||
|
|
|
||||||
|
|
@ -84,7 +84,8 @@ public class IterableLocusContextQueueTest extends LocusContextQueueTemplate {
|
||||||
if(new GenomeLoc(read).containsP(locusContext.getLocation()))
|
if(new GenomeLoc(read).containsP(locusContext.getLocation()))
|
||||||
Assert.assertTrue("Target locus context does not contain reads", locusContext.getReads().contains(read) );
|
Assert.assertTrue("Target locus context does not contain reads", locusContext.getReads().contains(read) );
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Assert.assertFalse("Iterator is not bounded at boundaries of shard", iterableQueue.hasNext());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,8 +7,10 @@ import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.LocusShard;
|
import org.broadinstitute.sting.gatk.dataSources.shards.LocusShard;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.File;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
@ -294,6 +296,11 @@ public abstract class LocusContextQueueTemplate extends BaseTest {
|
||||||
backingIterator = backingList.iterator();
|
backingIterator = backingList.iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Reads getSourceInfo() {
|
||||||
|
// There are no sources for these reads.
|
||||||
|
return new Reads(new ArrayList<File>());
|
||||||
|
}
|
||||||
|
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
return backingIterator.hasNext();
|
return backingIterator.hasNext();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,8 @@ import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
|
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
|
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
|
||||||
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
|
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
|
|
@ -45,7 +47,7 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
public class SAMBAMDataSourceTest extends BaseTest {
|
public class SAMBAMDataSourceTest extends BaseTest {
|
||||||
|
|
||||||
private List<String> fl;
|
private List<File> fl;
|
||||||
private FastaSequenceFile2 seq;
|
private FastaSequenceFile2 seq;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -55,7 +57,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
||||||
*/
|
*/
|
||||||
@Before
|
@Before
|
||||||
public void doForEachTest() {
|
public void doForEachTest() {
|
||||||
fl = new ArrayList<String>();
|
fl = new ArrayList<File>();
|
||||||
|
|
||||||
// sequence
|
// sequence
|
||||||
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||||
|
|
@ -83,18 +85,18 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
|
||||||
// setup the data
|
// setup the data
|
||||||
fl.add(oneKGLocation + "/pilot3/sams/NA12892.bam");
|
fl.add(new File(oneKGLocation + "/pilot3/sams/NA12892.bam"));
|
||||||
|
Reads reads = new Reads(fl);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SAMDataSource data = new SAMDataSource(fl);
|
SAMDataSource data = new SAMDataSource(reads);
|
||||||
for (Shard sh : strat) {
|
for (Shard sh : strat) {
|
||||||
int readCount = 0;
|
int readCount = 0;
|
||||||
count++;
|
count++;
|
||||||
|
|
||||||
logger.debug("Start : " + sh.getGenomeLoc().getStart() + " stop : " + sh.getGenomeLoc().getStop() + " contig " + sh.getGenomeLoc().getContig());
|
logger.debug("Start : " + sh.getGenomeLoc().getStart() + " stop : " + sh.getGenomeLoc().getStop() + " contig " + sh.getGenomeLoc().getContig());
|
||||||
logger.debug("count = " + count);
|
logger.debug("count = " + count);
|
||||||
MergingSamRecordIterator2 datum = (MergingSamRecordIterator2)data.seek(sh);
|
StingSAMIterator datum = data.seek(sh);
|
||||||
|
|
||||||
// for the first couple of shards make sure we can see the reads
|
// for the first couple of shards make sure we can see the reads
|
||||||
if (count < 5) {
|
if (count < 5) {
|
||||||
|
|
@ -126,7 +128,8 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
||||||
|
|
||||||
|
|
||||||
// setup the test files
|
// setup the test files
|
||||||
fl.add(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188.aligned.duplicates_marked.bam");
|
fl.add(new File(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188.aligned.duplicates_marked.bam"));
|
||||||
|
Reads reads = new Reads(fl);
|
||||||
|
|
||||||
ArrayList<Integer> readcountPerShard = new ArrayList<Integer>();
|
ArrayList<Integer> readcountPerShard = new ArrayList<Integer>();
|
||||||
ArrayList<Integer> readcountPerShard2 = new ArrayList<Integer>();
|
ArrayList<Integer> readcountPerShard2 = new ArrayList<Integer>();
|
||||||
|
|
@ -136,7 +139,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SAMDataSource data = new SAMDataSource(fl);
|
SAMDataSource data = new SAMDataSource(reads);
|
||||||
for (Shard sh : strat) {
|
for (Shard sh : strat) {
|
||||||
int readCount = 0;
|
int readCount = 0;
|
||||||
count++;
|
count++;
|
||||||
|
|
@ -144,7 +147,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
MergingSamRecordIterator2 datum = (MergingSamRecordIterator2)data.seek(sh);
|
StingSAMIterator datum = data.seek(sh);
|
||||||
|
|
||||||
for (SAMRecord r : datum) {
|
for (SAMRecord r : datum) {
|
||||||
readCount++;
|
readCount++;
|
||||||
|
|
@ -163,15 +166,17 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
||||||
|
|
||||||
// setup the data and the counter before our second run
|
// setup the data and the counter before our second run
|
||||||
fl.clear();
|
fl.clear();
|
||||||
fl.add(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188-01A-01W.aligned.duplicates_marked.bam");
|
fl.add(new File(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188-01A-01W.aligned.duplicates_marked.bam"));
|
||||||
fl.add(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188-10B-01W.aligned.duplicates_marked.bam");
|
fl.add(new File(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188-10B-01W.aligned.duplicates_marked.bam"));
|
||||||
|
reads = new Reads(fl);
|
||||||
|
|
||||||
count = 0;
|
count = 0;
|
||||||
// the sharding strat.
|
// the sharding strat.
|
||||||
strat = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, seq.getSequenceDictionary(), 100000);
|
strat = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, seq.getSequenceDictionary(), 100000);
|
||||||
|
|
||||||
logger.debug("Pile two:");
|
logger.debug("Pile two:");
|
||||||
try {
|
try {
|
||||||
SAMDataSource data = new SAMDataSource(fl);
|
SAMDataSource data = new SAMDataSource(reads);
|
||||||
for (Shard sh : strat) {
|
for (Shard sh : strat) {
|
||||||
int readCount = 0;
|
int readCount = 0;
|
||||||
count++;
|
count++;
|
||||||
|
|
@ -181,7 +186,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
MergingSamRecordIterator2 datum = (MergingSamRecordIterator2)data.seek(sh);
|
StingSAMIterator datum = data.seek(sh);
|
||||||
|
|
||||||
for (SAMRecord r : datum) {
|
for (SAMRecord r : datum) {
|
||||||
readCount++;
|
readCount++;
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
|
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
|
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
|
||||||
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
|
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
@ -46,7 +47,7 @@ import java.util.List;
|
||||||
public class SAMByReadsTest extends BaseTest {
|
public class SAMByReadsTest extends BaseTest {
|
||||||
|
|
||||||
private FastaSequenceFile2 seq;
|
private FastaSequenceFile2 seq;
|
||||||
private List<String> fl;
|
private List<File> fl;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This function does the setup of our parser, before each method call.
|
* This function does the setup of our parser, before each method call.
|
||||||
|
|
@ -55,7 +56,7 @@ public class SAMByReadsTest extends BaseTest {
|
||||||
*/
|
*/
|
||||||
@Before
|
@Before
|
||||||
public void doForEachTest() {
|
public void doForEachTest() {
|
||||||
fl = new ArrayList<String>();
|
fl = new ArrayList<File>();
|
||||||
|
|
||||||
// sequence
|
// sequence
|
||||||
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly17/v0/Homo_sapiens_assembly17.fasta"));
|
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly17/v0/Homo_sapiens_assembly17.fasta"));
|
||||||
|
|
@ -69,13 +70,15 @@ public class SAMByReadsTest extends BaseTest {
|
||||||
logger.warn("Executing testTotalReadCount");
|
logger.warn("Executing testTotalReadCount");
|
||||||
|
|
||||||
// setup the test files
|
// setup the test files
|
||||||
fl.add("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam");
|
fl.add(new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam"));
|
||||||
|
Reads reads = new Reads(fl);
|
||||||
|
|
||||||
final int targetReadCount = 5000;
|
final int targetReadCount = 5000;
|
||||||
|
|
||||||
ShardStrategy shardStrategy = ShardStrategyFactory.shatterByReadCount(seq.getSequenceDictionary(),targetReadCount);
|
ShardStrategy shardStrategy = ShardStrategyFactory.shatterByReadCount(seq.getSequenceDictionary(),targetReadCount);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SAMDataSource data = new SAMDataSource(fl);
|
SAMDataSource data = new SAMDataSource(reads);
|
||||||
|
|
||||||
// check the total read count
|
// check the total read count
|
||||||
final int totalReads = 10000;
|
final int totalReads = 10000;
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
|
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
|
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException;
|
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
@ -48,7 +49,7 @@ import java.util.List;
|
||||||
public class BoundedReadIteratorTest extends BaseTest {
|
public class BoundedReadIteratorTest extends BaseTest {
|
||||||
|
|
||||||
/** the file list and the fasta sequence */
|
/** the file list and the fasta sequence */
|
||||||
private List<String> fl;
|
private List<File> fl;
|
||||||
private FastaSequenceFile2 seq;
|
private FastaSequenceFile2 seq;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -58,7 +59,7 @@ public class BoundedReadIteratorTest extends BaseTest {
|
||||||
*/
|
*/
|
||||||
@Before
|
@Before
|
||||||
public void doForEachTest() {
|
public void doForEachTest() {
|
||||||
fl = new ArrayList<String>();
|
fl = new ArrayList<File>();
|
||||||
|
|
||||||
// sequence
|
// sequence
|
||||||
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||||
|
|
@ -76,14 +77,15 @@ public class BoundedReadIteratorTest extends BaseTest {
|
||||||
|
|
||||||
|
|
||||||
// setup the test files
|
// setup the test files
|
||||||
fl.add(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188.aligned.duplicates_marked.bam");
|
fl.add(new File(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188.aligned.duplicates_marked.bam"));
|
||||||
|
Reads reads = new Reads(fl);
|
||||||
|
|
||||||
// our target read
|
// our target read
|
||||||
final long boundedReadCount = 100;
|
final long boundedReadCount = 100;
|
||||||
long shardReadCount = 0;
|
long shardReadCount = 0;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SAMDataSource data = new SAMDataSource(fl);
|
SAMDataSource data = new SAMDataSource(reads);
|
||||||
|
|
||||||
// make sure we have a shard
|
// make sure we have a shard
|
||||||
if (!strat.hasNext()) {
|
if (!strat.hasNext()) {
|
||||||
|
|
@ -92,8 +94,8 @@ public class BoundedReadIteratorTest extends BaseTest {
|
||||||
Shard sd = strat.next();
|
Shard sd = strat.next();
|
||||||
|
|
||||||
|
|
||||||
MergingSamRecordIterator2 datum = (MergingSamRecordIterator2)data.seek(sd);
|
StingSAMIterator datum = data.seek(sd);
|
||||||
MergingSamRecordIterator2 datum2 = (MergingSamRecordIterator2)data.seek(sd);
|
StingSAMIterator datum2 = data.seek(sd);
|
||||||
|
|
||||||
// check the reads in the shard
|
// check the reads in the shard
|
||||||
for (SAMRecord r : datum) {
|
for (SAMRecord r : datum) {
|
||||||
|
|
@ -102,7 +104,7 @@ public class BoundedReadIteratorTest extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
// create the bounded iterator
|
// create the bounded iterator
|
||||||
BoundedReadIterator iter = new BoundedReadIterator(datum2, boundedReadCount);
|
BoundedReadIterator iter = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,datum2), boundedReadCount);
|
||||||
|
|
||||||
// now see how many reads are in the bounded iterator
|
// now see how many reads are in the bounded iterator
|
||||||
int readCount = 0;
|
int readCount = 0;
|
||||||
|
|
|
||||||
|
|
@ -97,7 +97,7 @@ public class StingSAMIteratorAdapterTest extends BaseTest {
|
||||||
final int COUNT = 100;
|
final int COUNT = 100;
|
||||||
MyTestIterator it = new MyTestIterator();
|
MyTestIterator it = new MyTestIterator();
|
||||||
|
|
||||||
StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(it);
|
StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(null,it);
|
||||||
int countCheck = 0;
|
int countCheck = 0;
|
||||||
while (samIt.hasNext()) {
|
while (samIt.hasNext()) {
|
||||||
samIt.next();
|
samIt.next();
|
||||||
|
|
@ -116,7 +116,7 @@ public class StingSAMIteratorAdapterTest extends BaseTest {
|
||||||
|
|
||||||
MyTestCloseableIterator it = new MyTestCloseableIterator();
|
MyTestCloseableIterator it = new MyTestCloseableIterator();
|
||||||
|
|
||||||
StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(it);
|
StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(null,it);
|
||||||
|
|
||||||
int countCheck = 0;
|
int countCheck = 0;
|
||||||
while (samIt.hasNext()) {
|
while (samIt.hasNext()) {
|
||||||
|
|
@ -133,7 +133,7 @@ public class StingSAMIteratorAdapterTest extends BaseTest {
|
||||||
|
|
||||||
MyTestCloseableIterator it = new MyTestCloseableIterator();
|
MyTestCloseableIterator it = new MyTestCloseableIterator();
|
||||||
|
|
||||||
StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(it);
|
StingSAMIterator samIt = StingSAMIteratorAdapter.adapt(null,it);
|
||||||
|
|
||||||
|
|
||||||
int countCheck = 0;
|
int countCheck = 0;
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.gatk.walkers.CountReadsWalker;
|
import org.broadinstitute.sting.gatk.walkers.CountReadsWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||||
|
|
@ -121,15 +122,7 @@ public class TraverseReadsTest extends BaseTest {
|
||||||
ref.getSequenceDictionary(),
|
ref.getSequenceDictionary(),
|
||||||
readSize);
|
readSize);
|
||||||
|
|
||||||
List<File> unpackedReads = null;
|
SAMDataSource dataSource = new SAMDataSource(new Reads(bamList));
|
||||||
try {
|
|
||||||
unpackedReads = TraversalEngine.unpackReads(bamList);
|
|
||||||
}
|
|
||||||
catch (FileNotFoundException ex) {
|
|
||||||
throw new RuntimeException(ex);
|
|
||||||
}
|
|
||||||
|
|
||||||
SAMDataSource dataSource = new SAMDataSource(unpackedReads);
|
|
||||||
dataSource.viewUnmappedReads(false);
|
dataSource.viewUnmappedReads(false);
|
||||||
|
|
||||||
countReadWalker.initialize();
|
countReadWalker.initialize();
|
||||||
|
|
@ -176,15 +169,8 @@ public class TraverseReadsTest extends BaseTest {
|
||||||
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,
|
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,
|
||||||
ref.getSequenceDictionary(),
|
ref.getSequenceDictionary(),
|
||||||
readSize);
|
readSize);
|
||||||
List<File> unpackedReads = null;
|
|
||||||
try {
|
|
||||||
unpackedReads = TraversalEngine.unpackReads(bamList);
|
|
||||||
}
|
|
||||||
catch (FileNotFoundException ex) {
|
|
||||||
throw new RuntimeException(ex);
|
|
||||||
}
|
|
||||||
|
|
||||||
SAMDataSource dataSource = new SAMDataSource(unpackedReads);
|
SAMDataSource dataSource = new SAMDataSource(new Reads(bamList));
|
||||||
dataSource.viewUnmappedReads(true);
|
dataSource.viewUnmappedReads(true);
|
||||||
|
|
||||||
countReadWalker.initialize();
|
countReadWalker.initialize();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue