Prep for documenting command-line arguments: delete some arguments that don't make sense any more given
the state of the traversals and GATK input requirements: all_loci (replaced by walker annotation), max OTF sorts (bam files must be sorted and indexed), threaded io (replaced by data sharding framework). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1144 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
5a5103cfd2
commit
9b182e3063
|
|
@ -96,18 +96,10 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "outerr", shortName = "oe", doc = "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", required = false)
|
||||
public String outErrFileName = null;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "all_loci", shortName = "A", doc = "Should we process all loci, not just those covered by reads", required = false)
|
||||
public Boolean walkAllLoci = false;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "maximum_reads", shortName = "M", doc = "Maximum number of iterations to process before exiting, the lower bound is zero. Intended only for testing", required = false)
|
||||
public Integer maximumEngineIterations = -1;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "sort_on_the_fly", shortName = "sort", doc = "Maximum number of reads to sort on the fly", required = false)
|
||||
public Integer maximumReadSorts = null;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false)
|
||||
public Integer BAMcompression = null;
|
||||
|
|
@ -132,10 +124,6 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations, nothing will be checked at runtime.", required = false)
|
||||
public Boolean unsafe = false;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "threaded_IO", shortName = "P", doc = "If set, enables threaded I/O operations", required = false)
|
||||
public Boolean enabledThreadedIO = false;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false)
|
||||
public Boolean disableThreading = false;
|
||||
|
|
@ -253,16 +241,9 @@ public class GATKArgumentCollection {
|
|||
if (!other.HAPMAPChipFile.equals(this.HAPMAPChipFile)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.enabledThreadedIO.equals(this.enabledThreadedIO)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.unsafe.equals(this.unsafe)) {
|
||||
return false;
|
||||
}
|
||||
if (( other.maximumReadSorts == null && this.maximumReadSorts != null ) ||
|
||||
( other.maximumReadSorts != null && !other.maximumReadSorts.equals(this.maximumReadSorts) )) {
|
||||
return false;
|
||||
}
|
||||
if (( other.BAMcompression == null && this.BAMcompression != null ) ||
|
||||
( other.BAMcompression != null && !other.BAMcompression.equals(this.BAMcompression) )) {
|
||||
return false;
|
||||
|
|
@ -279,9 +260,6 @@ public class GATKArgumentCollection {
|
|||
( other.downsampleCoverage != null && !other.downsampleCoverage.equals(this.downsampleCoverage) )) {
|
||||
return false;
|
||||
}
|
||||
if (!other.walkAllLoci.equals(this.walkAllLoci)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.outFileName.equals(this.outFileName)) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -157,8 +157,6 @@ public class GenomeAnalysisEngine {
|
|||
// we need to verify different parameter based on the walker type
|
||||
if (my_walker instanceof LocusWalker || my_walker instanceof LocusWindowWalker) {
|
||||
// create the MicroScheduler
|
||||
if( argCollection.walkAllLoci )
|
||||
Utils.scareUser("Argument --all_loci is deprecated. Please annotate your walker with @By(DataSource.REFERENCE) to perform a by-reference traversal.");
|
||||
microScheduler = MicroScheduler.create(my_walker, extractSourceInfoFromArguments(argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads);
|
||||
engine = microScheduler.getTraversalEngine();
|
||||
}
|
||||
|
|
@ -193,8 +191,6 @@ public class GenomeAnalysisEngine {
|
|||
|
||||
engine.setReadFilters(sourceInfo);
|
||||
|
||||
engine.setThreadedIO(argCollection.enabledThreadedIO);
|
||||
engine.setWalkOverAllSites(argCollection.walkAllLoci);
|
||||
engine.initialize();
|
||||
}
|
||||
|
||||
|
|
@ -226,7 +222,6 @@ public class GenomeAnalysisEngine {
|
|||
getValidationStringency(),
|
||||
argCollection.downsampleFraction,
|
||||
argCollection.downsampleCoverage,
|
||||
argCollection.maximumReadSorts,
|
||||
!argCollection.unsafe,
|
||||
argCollection.filterZeroMappingQualityReads );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@ public class Reads {
|
|||
private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT;
|
||||
private Double downsamplingFraction = null;
|
||||
private Integer downsampleToCoverage = null;
|
||||
private Integer maxOnFlySorts = null;
|
||||
private Boolean beSafe = null;
|
||||
private Boolean filterZeroMappingQualityReads = null;
|
||||
|
||||
|
|
@ -66,14 +65,6 @@ public class Reads {
|
|||
return downsampleToCoverage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the maximum number of supported on-the-fly sorts.
|
||||
* @return Max number of on-the-fly sorts.
|
||||
*/
|
||||
public Integer getMaxOnTheFlySorts() {
|
||||
return maxOnFlySorts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return whether to 'verify' the reads as we pass through them.
|
||||
* @return Whether to verify the reads.
|
||||
|
|
@ -102,7 +93,6 @@ public class Reads {
|
|||
* @param strictness Stringency of reads file parsing.
|
||||
* @param downsampleFraction fraction of reads to downsample.
|
||||
* @param downsampleCoverage downsampling per-locus.
|
||||
* @param maxOnFlySorts how many sorts to perform on-the-fly.
|
||||
* @param beSafe Whether to enable safety checking.
|
||||
* @param filterZeroMappingQualityReads whether to filter zero mapping quality reads.
|
||||
*/
|
||||
|
|
@ -110,14 +100,12 @@ public class Reads {
|
|||
SAMFileReader.ValidationStringency strictness,
|
||||
Double downsampleFraction,
|
||||
Integer downsampleCoverage,
|
||||
Integer maxOnFlySorts,
|
||||
Boolean beSafe,
|
||||
Boolean filterZeroMappingQualityReads ) {
|
||||
this.readsFiles = samFiles;
|
||||
this.validationStringency = strictness;
|
||||
this.downsamplingFraction = downsampleFraction;
|
||||
this.downsampleToCoverage = downsampleCoverage;
|
||||
this.maxOnFlySorts = maxOnFlySorts;
|
||||
this.beSafe = beSafe;
|
||||
this.filterZeroMappingQualityReads = filterZeroMappingQualityReads;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -133,7 +133,6 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
iterator = TraversalEngine.applyDecoratingIterators(true,
|
||||
iterator,
|
||||
reads.getDownsamplingFraction(),
|
||||
reads.getMaxOnTheFlySorts(),
|
||||
reads.getFilterZeroMappingQualityReads(),
|
||||
reads.getSafetyChecking());
|
||||
} else if (shard.getShardType() == Shard.ShardType.LOCUS || shard.getShardType() == Shard.ShardType.INTERVAL) {
|
||||
|
|
@ -141,7 +140,6 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
iterator = TraversalEngine.applyDecoratingIterators(false,
|
||||
iterator,
|
||||
reads.getDownsamplingFraction(),
|
||||
reads.getMaxOnTheFlySorts(),
|
||||
reads.getFilterZeroMappingQualityReads(),
|
||||
reads.getSafetyChecking());
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -1,93 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: hanna
|
||||
* Date: Mar 16, 2009
|
||||
* Time: 6:08:08 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class SamQueryIterator implements Iterator<SAMRecord> {
|
||||
|
||||
SAMFileReader reader = null;
|
||||
|
||||
Iterator<GenomeLoc> locIter = null;
|
||||
CloseableIterator<SAMRecord> recordIter = null;
|
||||
|
||||
public SamQueryIterator( SAMFileReader reader, ArrayList<GenomeLoc> locs ) {
|
||||
this.reader = reader;
|
||||
|
||||
// Our internal contract for the class guarantees that locIter and recordIter are never null.
|
||||
// Initialize them and seed them with empty data as necessary.
|
||||
if(locs != null) {
|
||||
// The user requested a specific set of locations, set up the iterators accordly.
|
||||
locIter = locs.iterator();
|
||||
recordIter = new NullCloseableIterator<SAMRecord>();
|
||||
}
|
||||
else {
|
||||
// The user requested traversal of the entire SAM file. Handle that here.
|
||||
// TODO: This would be better handled as a completely separate iterator.
|
||||
locIter = new ArrayList<GenomeLoc>().iterator();
|
||||
recordIter = reader.iterator();
|
||||
}
|
||||
|
||||
bumpToNextSAMRecord();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
bumpToNextSAMRecord();
|
||||
return recordIter.hasNext();
|
||||
}
|
||||
|
||||
public SAMRecord next() {
|
||||
bumpToNextSAMRecord();
|
||||
return recordIter.next();
|
||||
}
|
||||
|
||||
/**
|
||||
* Bump the loc iterator to the next spot with a read.
|
||||
*
|
||||
* For simplicity's sake, bumpToNextSAMRecord() expects locIter and recordIter to be non-null, and
|
||||
* guarantees that locIter and recordIter will be non-null after the bump.
|
||||
*/
|
||||
private void bumpToNextSAMRecord() {
|
||||
// If there's a record still waiting in the current iterator, do nothing.
|
||||
if( recordIter.hasNext() ) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Otherwise, find the next record.
|
||||
recordIter.close();
|
||||
while( locIter.hasNext() ) {
|
||||
GenomeLoc currentLoc = locIter.next();
|
||||
recordIter = reader.queryOverlapping( currentLoc.getContig(),
|
||||
(int)currentLoc.getStart(),
|
||||
(int)currentLoc.getStop() );
|
||||
if( recordIter.hasNext() )
|
||||
break;
|
||||
else
|
||||
recordIter.close();
|
||||
}
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||
}
|
||||
|
||||
private class NullCloseableIterator<T> implements CloseableIterator<T> {
|
||||
public boolean hasNext() { return false; }
|
||||
public T next() { throw new java.util.NoSuchElementException(); }
|
||||
public void close() {}
|
||||
public void remove() {}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,56 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import org.broadinstitute.sting.utils.ComparableSAMRecord;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
|
||||
// TODO: Deprecate?
|
||||
// I don't think we need this if we're only allowing sorted and indexed BAM Files in the GATK - Aaron
|
||||
public class SortSamIterator implements StingSAMIterator {
|
||||
private Reads sourceInfo;
|
||||
private Iterator<ComparableSAMRecord> it;
|
||||
|
||||
public SortSamIterator(StingSAMIterator unsortedIter, int maxSorts) {
|
||||
sourceInfo = unsortedIter.getSourceInfo();
|
||||
ArrayList<ComparableSAMRecord> list = new ArrayList<ComparableSAMRecord>();
|
||||
while (unsortedIter.hasNext()) {
|
||||
list.add(new ComparableSAMRecord(unsortedIter.next()));
|
||||
// limit how much can be sorted for now
|
||||
if (list.size() > maxSorts)
|
||||
throw new UnsupportedOperationException("Can not sort files with more than " + maxSorts + " reads on the fly!");
|
||||
}
|
||||
Collections.sort(list);
|
||||
it = list.iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves information about reads sources.
|
||||
* @return Info about the sources of reads.
|
||||
*/
|
||||
public Reads getSourceInfo() {
|
||||
if( sourceInfo == null )
|
||||
throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework.");
|
||||
return sourceInfo;
|
||||
}
|
||||
|
||||
public boolean hasNext() { return it.hasNext(); }
|
||||
public SAMRecord next() { return it.next().getRecord(); }
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||
}
|
||||
|
||||
public void close() {
|
||||
// nothing to do right now
|
||||
}
|
||||
|
||||
public Iterator<SAMRecord> iterator() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,75 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: Feb 24, 2009
|
||||
* Time: 10:24:38 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ThreadedIterator<T> implements Iterator<T>, Runnable {
|
||||
private Iterator<T> it;
|
||||
private final BlockingQueue<T> queue;
|
||||
private int nOps = 0;
|
||||
private final int printStateFreq = -1;
|
||||
|
||||
protected static Logger logger = Logger.getLogger(ThreadedIterator.class);
|
||||
|
||||
public void run() {
|
||||
try {
|
||||
while (it.hasNext()) {
|
||||
// Spin loop...if queue has remaining capacity, add more elements.
|
||||
// Otherwise, yield to the thread(s) waiting for the data.
|
||||
if(queue.remainingCapacity() > 0 ) {
|
||||
synchronized(this) {
|
||||
queue.put(it.next());
|
||||
printState("addNext");
|
||||
}
|
||||
}
|
||||
else
|
||||
Thread.yield();
|
||||
}
|
||||
} catch (InterruptedException ex) {
|
||||
// bail out
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized void printState(final String op) {
|
||||
if ( printStateFreq != -1 && nOps++ % printStateFreq == 0 )
|
||||
logger.info(String.format(" [%s] Queue has %d elements %d ops%n", op, queue.size(), nOps));
|
||||
}
|
||||
|
||||
public ThreadedIterator(Iterator<T> it, int buffSize) {
|
||||
this.it = it;
|
||||
queue = new LinkedBlockingQueue<T>(buffSize);
|
||||
new Thread(this).start();
|
||||
}
|
||||
|
||||
public synchronized boolean hasNext() {
|
||||
return queue.peek() != null || it.hasNext();
|
||||
}
|
||||
|
||||
public T next() {
|
||||
printState("getNext");
|
||||
try {
|
||||
return queue.poll(10, TimeUnit.SECONDS);
|
||||
} catch (InterruptedException ex) {
|
||||
// bail out
|
||||
logger.info("ThreadedIterator next() timed out...");
|
||||
printState("getNext");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public void remove () {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
|
@ -71,8 +71,6 @@ public abstract class TraversalEngine {
|
|||
protected int maxOnFlySorts = 100000;
|
||||
protected double downsamplingFraction = 1.0;
|
||||
protected int downsamplingCoverage = 0;
|
||||
protected boolean THREADED_IO = false;
|
||||
protected int THREADED_IO_BUFFER_SIZE = 10000;
|
||||
protected boolean filterZeroMappingQualityReads = false;
|
||||
|
||||
|
||||
|
|
@ -126,14 +124,6 @@ public abstract class TraversalEngine {
|
|||
this.filterZeroMappingQualityReads = filterZeroMappingQualityReads;
|
||||
}
|
||||
|
||||
public void setThreadedIO(final boolean threadedIO) {
|
||||
this.THREADED_IO = threadedIO;
|
||||
}
|
||||
|
||||
public void setWalkOverAllSites(final boolean walkOverAllSites) {
|
||||
this.walkOverAllSites = walkOverAllSites;
|
||||
}
|
||||
|
||||
private void setDebugging(final boolean d) {
|
||||
DEBUGGING = d;
|
||||
}
|
||||
|
|
@ -179,7 +169,6 @@ public abstract class TraversalEngine {
|
|||
public void setReadFilters( Reads reads) {
|
||||
if( reads.getDownsamplingFraction() != null ) setDownsampleByFraction(reads.getDownsamplingFraction());
|
||||
if( reads.getDownsampleToCoverage() != null ) setDownsampleByCoverage(reads.getDownsampleToCoverage());
|
||||
if( reads.getMaxOnTheFlySorts() != null ) setSortOnFly(reads.getMaxOnTheFlySorts());
|
||||
if( reads.getSafetyChecking() != null ) setSafetyChecking(reads.getSafetyChecking());
|
||||
}
|
||||
|
||||
|
|
@ -346,11 +335,6 @@ public abstract class TraversalEngine {
|
|||
StingSAMIterator wrappedIterator = StingSAMIteratorAdapter.adapt(null,rawIterator);
|
||||
wrappedIterator = applyDecoratingIterators(enableVerification,wrappedIterator);
|
||||
|
||||
if (THREADED_IO) {
|
||||
logger.info(String.format("Enabling threaded I/O with buffer of %d reads", THREADED_IO_BUFFER_SIZE));
|
||||
wrappedIterator = StingSAMIteratorAdapter.adapt(null,new ThreadedIterator<SAMRecord>(wrappedIterator, THREADED_IO_BUFFER_SIZE));
|
||||
}
|
||||
|
||||
return wrappedIterator;
|
||||
}
|
||||
|
||||
|
|
@ -365,7 +349,6 @@ public abstract class TraversalEngine {
|
|||
return applyDecoratingIterators(enableVerification,
|
||||
wrappedIterator,
|
||||
DOWNSAMPLE_BY_FRACTION ? downsamplingFraction : null,
|
||||
SORT_ON_FLY ? maxOnFlySorts : null,
|
||||
filterZeroMappingQualityReads,
|
||||
beSafeP);
|
||||
}
|
||||
|
|
@ -377,7 +360,6 @@ public abstract class TraversalEngine {
|
|||
public static StingSAMIterator applyDecoratingIterators(boolean enableVerification,
|
||||
StingSAMIterator wrappedIterator,
|
||||
Double downsamplingFraction,
|
||||
Integer maxOnFlySorts,
|
||||
Boolean filterZeroMappingQualityReads,
|
||||
Boolean beSafeP) {
|
||||
// NOTE: this (and other filtering) should be done before on-the-fly sorting
|
||||
|
|
@ -385,9 +367,6 @@ public abstract class TraversalEngine {
|
|||
if (downsamplingFraction != null)
|
||||
wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction);
|
||||
|
||||
if (maxOnFlySorts != null)
|
||||
wrappedIterator = new SortSamIterator(wrappedIterator, maxOnFlySorts);
|
||||
|
||||
if (beSafeP != null && beSafeP && enableVerification)
|
||||
wrappedIterator = new VerifyingSamIterator(wrappedIterator);
|
||||
|
||||
|
|
|
|||
|
|
@ -85,14 +85,11 @@ public class GATKArgumentCollectionTest extends BaseTest {
|
|||
collect.DBSNPFile = "DBSNPFile".toLowerCase();
|
||||
collect.HAPMAPFile = "HAPMAPFile".toLowerCase();
|
||||
collect.HAPMAPChipFile = "HAPMAPChipFile".toLowerCase();
|
||||
collect.enabledThreadedIO = true;
|
||||
collect.unsafe = false;
|
||||
collect.maximumReadSorts = null;
|
||||
collect.downsampleFraction = null;
|
||||
collect.downsampleCoverage = null;
|
||||
collect.intervals = new ArrayList<String>();
|
||||
collect.intervals.add("intervals".toLowerCase());
|
||||
collect.walkAllLoci = true;
|
||||
collect.disableThreading = false;
|
||||
collect.outFileName = "outFileName".toLowerCase();
|
||||
collect.errFileName = "errFileName".toLowerCase();
|
||||
|
|
|
|||
Loading…
Reference in New Issue