Prep for documenting command-line arguments: delete some arguments that don't make sense any more given

the state of the traversals and GATK input requirements: all_loci (replaced by walker annotation), max
OTF sorts (bam files must be sorted and indexed), threaded io (replaced by data sharding framework).


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1144 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-07-01 18:23:35 +00:00
parent 5a5103cfd2
commit 9b182e3063
9 changed files with 0 additions and 289 deletions

View File

@ -96,18 +96,10 @@ public class GATKArgumentCollection {
@Argument(fullName = "outerr", shortName = "oe", doc = "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", required = false)
public String outErrFileName = null;
@Element(required = false)
@Argument(fullName = "all_loci", shortName = "A", doc = "Should we process all loci, not just those covered by reads", required = false)
public Boolean walkAllLoci = false;
@Element(required = false)
@Argument(fullName = "maximum_reads", shortName = "M", doc = "Maximum number of iterations to process before exiting, the lower bound is zero. Intended only for testing", required = false)
public Integer maximumEngineIterations = -1;
@Element(required = false)
@Argument(fullName = "sort_on_the_fly", shortName = "sort", doc = "Maximum number of reads to sort on the fly", required = false)
public Integer maximumReadSorts = null;
@Element(required = false)
@Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false)
public Integer BAMcompression = null;
@ -132,10 +124,6 @@ public class GATKArgumentCollection {
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations, nothing will be checked at runtime.", required = false)
public Boolean unsafe = false;
@Element(required = false)
@Argument(fullName = "threaded_IO", shortName = "P", doc = "If set, enables threaded I/O operations", required = false)
public Boolean enabledThreadedIO = false;
@Element(required = false)
@Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false)
public Boolean disableThreading = false;
@ -253,16 +241,9 @@ public class GATKArgumentCollection {
if (!other.HAPMAPChipFile.equals(this.HAPMAPChipFile)) {
return false;
}
if (!other.enabledThreadedIO.equals(this.enabledThreadedIO)) {
return false;
}
if (!other.unsafe.equals(this.unsafe)) {
return false;
}
if (( other.maximumReadSorts == null && this.maximumReadSorts != null ) ||
( other.maximumReadSorts != null && !other.maximumReadSorts.equals(this.maximumReadSorts) )) {
return false;
}
if (( other.BAMcompression == null && this.BAMcompression != null ) ||
( other.BAMcompression != null && !other.BAMcompression.equals(this.BAMcompression) )) {
return false;
@ -279,9 +260,6 @@ public class GATKArgumentCollection {
( other.downsampleCoverage != null && !other.downsampleCoverage.equals(this.downsampleCoverage) )) {
return false;
}
if (!other.walkAllLoci.equals(this.walkAllLoci)) {
return false;
}
if (!other.outFileName.equals(this.outFileName)) {
return false;
}

View File

@ -157,8 +157,6 @@ public class GenomeAnalysisEngine {
// we need to verify different parameter based on the walker type
if (my_walker instanceof LocusWalker || my_walker instanceof LocusWindowWalker) {
// create the MicroScheduler
if( argCollection.walkAllLoci )
Utils.scareUser("Argument --all_loci is deprecated. Please annotate your walker with @By(DataSource.REFERENCE) to perform a by-reference traversal.");
microScheduler = MicroScheduler.create(my_walker, extractSourceInfoFromArguments(argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads);
engine = microScheduler.getTraversalEngine();
}
@ -193,8 +191,6 @@ public class GenomeAnalysisEngine {
engine.setReadFilters(sourceInfo);
engine.setThreadedIO(argCollection.enabledThreadedIO);
engine.setWalkOverAllSites(argCollection.walkAllLoci);
engine.initialize();
}
@ -226,7 +222,6 @@ public class GenomeAnalysisEngine {
getValidationStringency(),
argCollection.downsampleFraction,
argCollection.downsampleCoverage,
argCollection.maximumReadSorts,
!argCollection.unsafe,
argCollection.filterZeroMappingQualityReads );
}

View File

@ -30,7 +30,6 @@ public class Reads {
private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT;
private Double downsamplingFraction = null;
private Integer downsampleToCoverage = null;
private Integer maxOnFlySorts = null;
private Boolean beSafe = null;
private Boolean filterZeroMappingQualityReads = null;
@ -66,14 +65,6 @@ public class Reads {
return downsampleToCoverage;
}
/**
* Get the maximum number of supported on-the-fly sorts.
* @return Max number of on-the-fly sorts.
*/
public Integer getMaxOnTheFlySorts() {
return maxOnFlySorts;
}
/**
* Return whether to 'verify' the reads as we pass through them.
* @return Whether to verify the reads.
@ -102,7 +93,6 @@ public class Reads {
* @param strictness Stringency of reads file parsing.
* @param downsampleFraction fraction of reads to downsample.
* @param downsampleCoverage downsampling per-locus.
* @param maxOnFlySorts how many sorts to perform on-the-fly.
* @param beSafe Whether to enable safety checking.
* @param filterZeroMappingQualityReads whether to filter zero mapping quality reads.
*/
@ -110,14 +100,12 @@ public class Reads {
SAMFileReader.ValidationStringency strictness,
Double downsampleFraction,
Integer downsampleCoverage,
Integer maxOnFlySorts,
Boolean beSafe,
Boolean filterZeroMappingQualityReads ) {
this.readsFiles = samFiles;
this.validationStringency = strictness;
this.downsamplingFraction = downsampleFraction;
this.downsampleToCoverage = downsampleCoverage;
this.maxOnFlySorts = maxOnFlySorts;
this.beSafe = beSafe;
this.filterZeroMappingQualityReads = filterZeroMappingQualityReads;
}

View File

@ -133,7 +133,6 @@ public class SAMDataSource implements SimpleDataSource {
iterator = TraversalEngine.applyDecoratingIterators(true,
iterator,
reads.getDownsamplingFraction(),
reads.getMaxOnTheFlySorts(),
reads.getFilterZeroMappingQualityReads(),
reads.getSafetyChecking());
} else if (shard.getShardType() == Shard.ShardType.LOCUS || shard.getShardType() == Shard.ShardType.INTERVAL) {
@ -141,7 +140,6 @@ public class SAMDataSource implements SimpleDataSource {
iterator = TraversalEngine.applyDecoratingIterators(false,
iterator,
reads.getDownsamplingFraction(),
reads.getMaxOnTheFlySorts(),
reads.getFilterZeroMappingQualityReads(),
reads.getSafetyChecking());
} else {

View File

@ -1,93 +0,0 @@
package org.broadinstitute.sting.gatk.iterators;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
/**
* Created by IntelliJ IDEA.
* User: hanna
* Date: Mar 16, 2009
* Time: 6:08:08 PM
* To change this template use File | Settings | File Templates.
*/
public class SamQueryIterator implements Iterator<SAMRecord> {
SAMFileReader reader = null;
Iterator<GenomeLoc> locIter = null;
CloseableIterator<SAMRecord> recordIter = null;
public SamQueryIterator( SAMFileReader reader, ArrayList<GenomeLoc> locs ) {
this.reader = reader;
// Our internal contract for the class guarantees that locIter and recordIter are never null.
// Initialize them and seed them with empty data as necessary.
if(locs != null) {
// The user requested a specific set of locations, set up the iterators accordly.
locIter = locs.iterator();
recordIter = new NullCloseableIterator<SAMRecord>();
}
else {
// The user requested traversal of the entire SAM file. Handle that here.
// TODO: This would be better handled as a completely separate iterator.
locIter = new ArrayList<GenomeLoc>().iterator();
recordIter = reader.iterator();
}
bumpToNextSAMRecord();
}
public boolean hasNext() {
bumpToNextSAMRecord();
return recordIter.hasNext();
}
public SAMRecord next() {
bumpToNextSAMRecord();
return recordIter.next();
}
/**
* Bump the loc iterator to the next spot with a read.
*
* For simplicity's sake, bumpToNextSAMRecord() expects locIter and recordIter to be non-null, and
* guarantees that locIter and recordIter will be non-null after the bump.
*/
private void bumpToNextSAMRecord() {
// If there's a record still waiting in the current iterator, do nothing.
if( recordIter.hasNext() ) {
return;
}
// Otherwise, find the next record.
recordIter.close();
while( locIter.hasNext() ) {
GenomeLoc currentLoc = locIter.next();
recordIter = reader.queryOverlapping( currentLoc.getContig(),
(int)currentLoc.getStart(),
(int)currentLoc.getStop() );
if( recordIter.hasNext() )
break;
else
recordIter.close();
}
}
public void remove() {
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
}
private class NullCloseableIterator<T> implements CloseableIterator<T> {
public boolean hasNext() { return false; }
public T next() { throw new java.util.NoSuchElementException(); }
public void close() {}
public void remove() {}
}
}

View File

@ -1,56 +0,0 @@
package org.broadinstitute.sting.gatk.iterators;
import org.broadinstitute.sting.utils.ComparableSAMRecord;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.gatk.Reads;
import net.sf.samtools.SAMRecord;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
// TODO: Deprecate?
// I don't think we need this if we're only allowing sorted and indexed BAM Files in the GATK - Aaron
public class SortSamIterator implements StingSAMIterator {
private Reads sourceInfo;
private Iterator<ComparableSAMRecord> it;
public SortSamIterator(StingSAMIterator unsortedIter, int maxSorts) {
sourceInfo = unsortedIter.getSourceInfo();
ArrayList<ComparableSAMRecord> list = new ArrayList<ComparableSAMRecord>();
while (unsortedIter.hasNext()) {
list.add(new ComparableSAMRecord(unsortedIter.next()));
// limit how much can be sorted for now
if (list.size() > maxSorts)
throw new UnsupportedOperationException("Can not sort files with more than " + maxSorts + " reads on the fly!");
}
Collections.sort(list);
it = list.iterator();
}
/**
* Retrieves information about reads sources.
* @return Info about the sources of reads.
*/
public Reads getSourceInfo() {
if( sourceInfo == null )
throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework.");
return sourceInfo;
}
public boolean hasNext() { return it.hasNext(); }
public SAMRecord next() { return it.next().getRecord(); }
public void remove() {
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
}
public void close() {
// nothing to do right now
}
public Iterator<SAMRecord> iterator() {
return this;
}
}

View File

@ -1,75 +0,0 @@
package org.broadinstitute.sting.gatk.iterators;
import org.apache.log4j.Logger;
import java.util.Iterator;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
/**
* Created by IntelliJ IDEA.
* User: depristo
* Date: Feb 24, 2009
* Time: 10:24:38 AM
* To change this template use File | Settings | File Templates.
*/
public class ThreadedIterator<T> implements Iterator<T>, Runnable {
private Iterator<T> it;
private final BlockingQueue<T> queue;
private int nOps = 0;
private final int printStateFreq = -1;
protected static Logger logger = Logger.getLogger(ThreadedIterator.class);
public void run() {
try {
while (it.hasNext()) {
// Spin loop...if queue has remaining capacity, add more elements.
// Otherwise, yield to the thread(s) waiting for the data.
if(queue.remainingCapacity() > 0 ) {
synchronized(this) {
queue.put(it.next());
printState("addNext");
}
}
else
Thread.yield();
}
} catch (InterruptedException ex) {
// bail out
;
}
}
public synchronized void printState(final String op) {
if ( printStateFreq != -1 && nOps++ % printStateFreq == 0 )
logger.info(String.format(" [%s] Queue has %d elements %d ops%n", op, queue.size(), nOps));
}
public ThreadedIterator(Iterator<T> it, int buffSize) {
this.it = it;
queue = new LinkedBlockingQueue<T>(buffSize);
new Thread(this).start();
}
public synchronized boolean hasNext() {
return queue.peek() != null || it.hasNext();
}
public T next() {
printState("getNext");
try {
return queue.poll(10, TimeUnit.SECONDS);
} catch (InterruptedException ex) {
// bail out
logger.info("ThreadedIterator next() timed out...");
printState("getNext");
return null;
}
}
public void remove () {
throw new UnsupportedOperationException();
}
}

View File

@ -71,8 +71,6 @@ public abstract class TraversalEngine {
protected int maxOnFlySorts = 100000;
protected double downsamplingFraction = 1.0;
protected int downsamplingCoverage = 0;
protected boolean THREADED_IO = false;
protected int THREADED_IO_BUFFER_SIZE = 10000;
protected boolean filterZeroMappingQualityReads = false;
@ -126,14 +124,6 @@ public abstract class TraversalEngine {
this.filterZeroMappingQualityReads = filterZeroMappingQualityReads;
}
public void setThreadedIO(final boolean threadedIO) {
this.THREADED_IO = threadedIO;
}
public void setWalkOverAllSites(final boolean walkOverAllSites) {
this.walkOverAllSites = walkOverAllSites;
}
private void setDebugging(final boolean d) {
DEBUGGING = d;
}
@ -179,7 +169,6 @@ public abstract class TraversalEngine {
public void setReadFilters( Reads reads) {
if( reads.getDownsamplingFraction() != null ) setDownsampleByFraction(reads.getDownsamplingFraction());
if( reads.getDownsampleToCoverage() != null ) setDownsampleByCoverage(reads.getDownsampleToCoverage());
if( reads.getMaxOnTheFlySorts() != null ) setSortOnFly(reads.getMaxOnTheFlySorts());
if( reads.getSafetyChecking() != null ) setSafetyChecking(reads.getSafetyChecking());
}
@ -346,11 +335,6 @@ public abstract class TraversalEngine {
StingSAMIterator wrappedIterator = StingSAMIteratorAdapter.adapt(null,rawIterator);
wrappedIterator = applyDecoratingIterators(enableVerification,wrappedIterator);
if (THREADED_IO) {
logger.info(String.format("Enabling threaded I/O with buffer of %d reads", THREADED_IO_BUFFER_SIZE));
wrappedIterator = StingSAMIteratorAdapter.adapt(null,new ThreadedIterator<SAMRecord>(wrappedIterator, THREADED_IO_BUFFER_SIZE));
}
return wrappedIterator;
}
@ -365,7 +349,6 @@ public abstract class TraversalEngine {
return applyDecoratingIterators(enableVerification,
wrappedIterator,
DOWNSAMPLE_BY_FRACTION ? downsamplingFraction : null,
SORT_ON_FLY ? maxOnFlySorts : null,
filterZeroMappingQualityReads,
beSafeP);
}
@ -377,7 +360,6 @@ public abstract class TraversalEngine {
public static StingSAMIterator applyDecoratingIterators(boolean enableVerification,
StingSAMIterator wrappedIterator,
Double downsamplingFraction,
Integer maxOnFlySorts,
Boolean filterZeroMappingQualityReads,
Boolean beSafeP) {
// NOTE: this (and other filtering) should be done before on-the-fly sorting
@ -385,9 +367,6 @@ public abstract class TraversalEngine {
if (downsamplingFraction != null)
wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction);
if (maxOnFlySorts != null)
wrappedIterator = new SortSamIterator(wrappedIterator, maxOnFlySorts);
if (beSafeP != null && beSafeP && enableVerification)
wrappedIterator = new VerifyingSamIterator(wrappedIterator);

View File

@ -85,14 +85,11 @@ public class GATKArgumentCollectionTest extends BaseTest {
collect.DBSNPFile = "DBSNPFile".toLowerCase();
collect.HAPMAPFile = "HAPMAPFile".toLowerCase();
collect.HAPMAPChipFile = "HAPMAPChipFile".toLowerCase();
collect.enabledThreadedIO = true;
collect.unsafe = false;
collect.maximumReadSorts = null;
collect.downsampleFraction = null;
collect.downsampleCoverage = null;
collect.intervals = new ArrayList<String>();
collect.intervals.add("intervals".toLowerCase());
collect.walkAllLoci = true;
collect.disableThreading = false;
collect.outFileName = "outFileName".toLowerCase();
collect.errFileName = "errFileName".toLowerCase();