Prep for documenting command-line arguments: delete some arguments that don't make sense any more given

the state of the traversals and GATK input requirements: all_loci (replaced by walker annotation), max
OTF sorts (bam files must be sorted and indexed), threaded io (replaced by data sharding framework).


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1144 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-07-01 18:23:35 +00:00
parent 5a5103cfd2
commit 9b182e3063
9 changed files with 0 additions and 289 deletions

View File

@ -96,18 +96,10 @@ public class GATKArgumentCollection {
@Argument(fullName = "outerr", shortName = "oe", doc = "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", required = false) @Argument(fullName = "outerr", shortName = "oe", doc = "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", required = false)
public String outErrFileName = null; public String outErrFileName = null;
@Element(required = false)
@Argument(fullName = "all_loci", shortName = "A", doc = "Should we process all loci, not just those covered by reads", required = false)
public Boolean walkAllLoci = false;
@Element(required = false) @Element(required = false)
@Argument(fullName = "maximum_reads", shortName = "M", doc = "Maximum number of iterations to process before exiting, the lower bound is zero. Intended only for testing", required = false) @Argument(fullName = "maximum_reads", shortName = "M", doc = "Maximum number of iterations to process before exiting, the lower bound is zero. Intended only for testing", required = false)
public Integer maximumEngineIterations = -1; public Integer maximumEngineIterations = -1;
@Element(required = false)
@Argument(fullName = "sort_on_the_fly", shortName = "sort", doc = "Maximum number of reads to sort on the fly", required = false)
public Integer maximumReadSorts = null;
@Element(required = false) @Element(required = false)
@Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false) @Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false)
public Integer BAMcompression = null; public Integer BAMcompression = null;
@ -132,10 +124,6 @@ public class GATKArgumentCollection {
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations, nothing will be checked at runtime.", required = false) @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations, nothing will be checked at runtime.", required = false)
public Boolean unsafe = false; public Boolean unsafe = false;
@Element(required = false)
@Argument(fullName = "threaded_IO", shortName = "P", doc = "If set, enables threaded I/O operations", required = false)
public Boolean enabledThreadedIO = false;
@Element(required = false) @Element(required = false)
@Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false) @Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false)
public Boolean disableThreading = false; public Boolean disableThreading = false;
@ -253,16 +241,9 @@ public class GATKArgumentCollection {
if (!other.HAPMAPChipFile.equals(this.HAPMAPChipFile)) { if (!other.HAPMAPChipFile.equals(this.HAPMAPChipFile)) {
return false; return false;
} }
if (!other.enabledThreadedIO.equals(this.enabledThreadedIO)) {
return false;
}
if (!other.unsafe.equals(this.unsafe)) { if (!other.unsafe.equals(this.unsafe)) {
return false; return false;
} }
if (( other.maximumReadSorts == null && this.maximumReadSorts != null ) ||
( other.maximumReadSorts != null && !other.maximumReadSorts.equals(this.maximumReadSorts) )) {
return false;
}
if (( other.BAMcompression == null && this.BAMcompression != null ) || if (( other.BAMcompression == null && this.BAMcompression != null ) ||
( other.BAMcompression != null && !other.BAMcompression.equals(this.BAMcompression) )) { ( other.BAMcompression != null && !other.BAMcompression.equals(this.BAMcompression) )) {
return false; return false;
@ -279,9 +260,6 @@ public class GATKArgumentCollection {
( other.downsampleCoverage != null && !other.downsampleCoverage.equals(this.downsampleCoverage) )) { ( other.downsampleCoverage != null && !other.downsampleCoverage.equals(this.downsampleCoverage) )) {
return false; return false;
} }
if (!other.walkAllLoci.equals(this.walkAllLoci)) {
return false;
}
if (!other.outFileName.equals(this.outFileName)) { if (!other.outFileName.equals(this.outFileName)) {
return false; return false;
} }

View File

@ -157,8 +157,6 @@ public class GenomeAnalysisEngine {
// we need to verify different parameter based on the walker type // we need to verify different parameter based on the walker type
if (my_walker instanceof LocusWalker || my_walker instanceof LocusWindowWalker) { if (my_walker instanceof LocusWalker || my_walker instanceof LocusWindowWalker) {
// create the MicroScheduler // create the MicroScheduler
if( argCollection.walkAllLoci )
Utils.scareUser("Argument --all_loci is deprecated. Please annotate your walker with @By(DataSource.REFERENCE) to perform a by-reference traversal.");
microScheduler = MicroScheduler.create(my_walker, extractSourceInfoFromArguments(argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads); microScheduler = MicroScheduler.create(my_walker, extractSourceInfoFromArguments(argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads);
engine = microScheduler.getTraversalEngine(); engine = microScheduler.getTraversalEngine();
} }
@ -193,8 +191,6 @@ public class GenomeAnalysisEngine {
engine.setReadFilters(sourceInfo); engine.setReadFilters(sourceInfo);
engine.setThreadedIO(argCollection.enabledThreadedIO);
engine.setWalkOverAllSites(argCollection.walkAllLoci);
engine.initialize(); engine.initialize();
} }
@ -226,7 +222,6 @@ public class GenomeAnalysisEngine {
getValidationStringency(), getValidationStringency(),
argCollection.downsampleFraction, argCollection.downsampleFraction,
argCollection.downsampleCoverage, argCollection.downsampleCoverage,
argCollection.maximumReadSorts,
!argCollection.unsafe, !argCollection.unsafe,
argCollection.filterZeroMappingQualityReads ); argCollection.filterZeroMappingQualityReads );
} }

View File

@ -30,7 +30,6 @@ public class Reads {
private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT; private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT;
private Double downsamplingFraction = null; private Double downsamplingFraction = null;
private Integer downsampleToCoverage = null; private Integer downsampleToCoverage = null;
private Integer maxOnFlySorts = null;
private Boolean beSafe = null; private Boolean beSafe = null;
private Boolean filterZeroMappingQualityReads = null; private Boolean filterZeroMappingQualityReads = null;
@ -66,14 +65,6 @@ public class Reads {
return downsampleToCoverage; return downsampleToCoverage;
} }
/**
* Get the maximum number of supported on-the-fly sorts.
* @return Max number of on-the-fly sorts.
*/
public Integer getMaxOnTheFlySorts() {
return maxOnFlySorts;
}
/** /**
* Return whether to 'verify' the reads as we pass through them. * Return whether to 'verify' the reads as we pass through them.
* @return Whether to verify the reads. * @return Whether to verify the reads.
@ -102,7 +93,6 @@ public class Reads {
* @param strictness Stringency of reads file parsing. * @param strictness Stringency of reads file parsing.
* @param downsampleFraction fraction of reads to downsample. * @param downsampleFraction fraction of reads to downsample.
* @param downsampleCoverage downsampling per-locus. * @param downsampleCoverage downsampling per-locus.
* @param maxOnFlySorts how many sorts to perform on-the-fly.
* @param beSafe Whether to enable safety checking. * @param beSafe Whether to enable safety checking.
* @param filterZeroMappingQualityReads whether to filter zero mapping quality reads. * @param filterZeroMappingQualityReads whether to filter zero mapping quality reads.
*/ */
@ -110,14 +100,12 @@ public class Reads {
SAMFileReader.ValidationStringency strictness, SAMFileReader.ValidationStringency strictness,
Double downsampleFraction, Double downsampleFraction,
Integer downsampleCoverage, Integer downsampleCoverage,
Integer maxOnFlySorts,
Boolean beSafe, Boolean beSafe,
Boolean filterZeroMappingQualityReads ) { Boolean filterZeroMappingQualityReads ) {
this.readsFiles = samFiles; this.readsFiles = samFiles;
this.validationStringency = strictness; this.validationStringency = strictness;
this.downsamplingFraction = downsampleFraction; this.downsamplingFraction = downsampleFraction;
this.downsampleToCoverage = downsampleCoverage; this.downsampleToCoverage = downsampleCoverage;
this.maxOnFlySorts = maxOnFlySorts;
this.beSafe = beSafe; this.beSafe = beSafe;
this.filterZeroMappingQualityReads = filterZeroMappingQualityReads; this.filterZeroMappingQualityReads = filterZeroMappingQualityReads;
} }

View File

@ -133,7 +133,6 @@ public class SAMDataSource implements SimpleDataSource {
iterator = TraversalEngine.applyDecoratingIterators(true, iterator = TraversalEngine.applyDecoratingIterators(true,
iterator, iterator,
reads.getDownsamplingFraction(), reads.getDownsamplingFraction(),
reads.getMaxOnTheFlySorts(),
reads.getFilterZeroMappingQualityReads(), reads.getFilterZeroMappingQualityReads(),
reads.getSafetyChecking()); reads.getSafetyChecking());
} else if (shard.getShardType() == Shard.ShardType.LOCUS || shard.getShardType() == Shard.ShardType.INTERVAL) { } else if (shard.getShardType() == Shard.ShardType.LOCUS || shard.getShardType() == Shard.ShardType.INTERVAL) {
@ -141,7 +140,6 @@ public class SAMDataSource implements SimpleDataSource {
iterator = TraversalEngine.applyDecoratingIterators(false, iterator = TraversalEngine.applyDecoratingIterators(false,
iterator, iterator,
reads.getDownsamplingFraction(), reads.getDownsamplingFraction(),
reads.getMaxOnTheFlySorts(),
reads.getFilterZeroMappingQualityReads(), reads.getFilterZeroMappingQualityReads(),
reads.getSafetyChecking()); reads.getSafetyChecking());
} else { } else {

View File

@ -1,93 +0,0 @@
package org.broadinstitute.sting.gatk.iterators;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
/**
* Created by IntelliJ IDEA.
* User: hanna
* Date: Mar 16, 2009
* Time: 6:08:08 PM
* To change this template use File | Settings | File Templates.
*/
public class SamQueryIterator implements Iterator<SAMRecord> {
SAMFileReader reader = null;
Iterator<GenomeLoc> locIter = null;
CloseableIterator<SAMRecord> recordIter = null;
public SamQueryIterator( SAMFileReader reader, ArrayList<GenomeLoc> locs ) {
this.reader = reader;
// Our internal contract for the class guarantees that locIter and recordIter are never null.
// Initialize them and seed them with empty data as necessary.
if(locs != null) {
// The user requested a specific set of locations, set up the iterators accordly.
locIter = locs.iterator();
recordIter = new NullCloseableIterator<SAMRecord>();
}
else {
// The user requested traversal of the entire SAM file. Handle that here.
// TODO: This would be better handled as a completely separate iterator.
locIter = new ArrayList<GenomeLoc>().iterator();
recordIter = reader.iterator();
}
bumpToNextSAMRecord();
}
public boolean hasNext() {
bumpToNextSAMRecord();
return recordIter.hasNext();
}
public SAMRecord next() {
bumpToNextSAMRecord();
return recordIter.next();
}
/**
* Bump the loc iterator to the next spot with a read.
*
* For simplicity's sake, bumpToNextSAMRecord() expects locIter and recordIter to be non-null, and
* guarantees that locIter and recordIter will be non-null after the bump.
*/
private void bumpToNextSAMRecord() {
// If there's a record still waiting in the current iterator, do nothing.
if( recordIter.hasNext() ) {
return;
}
// Otherwise, find the next record.
recordIter.close();
while( locIter.hasNext() ) {
GenomeLoc currentLoc = locIter.next();
recordIter = reader.queryOverlapping( currentLoc.getContig(),
(int)currentLoc.getStart(),
(int)currentLoc.getStop() );
if( recordIter.hasNext() )
break;
else
recordIter.close();
}
}
public void remove() {
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
}
private class NullCloseableIterator<T> implements CloseableIterator<T> {
public boolean hasNext() { return false; }
public T next() { throw new java.util.NoSuchElementException(); }
public void close() {}
public void remove() {}
}
}

View File

@ -1,56 +0,0 @@
package org.broadinstitute.sting.gatk.iterators;
import org.broadinstitute.sting.utils.ComparableSAMRecord;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.gatk.Reads;
import net.sf.samtools.SAMRecord;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
// TODO: Deprecate?
// I don't think we need this if we're only allowing sorted and indexed BAM Files in the GATK - Aaron
public class SortSamIterator implements StingSAMIterator {
private Reads sourceInfo;
private Iterator<ComparableSAMRecord> it;
public SortSamIterator(StingSAMIterator unsortedIter, int maxSorts) {
sourceInfo = unsortedIter.getSourceInfo();
ArrayList<ComparableSAMRecord> list = new ArrayList<ComparableSAMRecord>();
while (unsortedIter.hasNext()) {
list.add(new ComparableSAMRecord(unsortedIter.next()));
// limit how much can be sorted for now
if (list.size() > maxSorts)
throw new UnsupportedOperationException("Can not sort files with more than " + maxSorts + " reads on the fly!");
}
Collections.sort(list);
it = list.iterator();
}
/**
* Retrieves information about reads sources.
* @return Info about the sources of reads.
*/
public Reads getSourceInfo() {
if( sourceInfo == null )
throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework.");
return sourceInfo;
}
public boolean hasNext() { return it.hasNext(); }
public SAMRecord next() { return it.next().getRecord(); }
public void remove() {
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
}
public void close() {
// nothing to do right now
}
public Iterator<SAMRecord> iterator() {
return this;
}
}

View File

@ -1,75 +0,0 @@
package org.broadinstitute.sting.gatk.iterators;
import org.apache.log4j.Logger;
import java.util.Iterator;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
/**
* Created by IntelliJ IDEA.
* User: depristo
* Date: Feb 24, 2009
* Time: 10:24:38 AM
* To change this template use File | Settings | File Templates.
*/
public class ThreadedIterator<T> implements Iterator<T>, Runnable {
private Iterator<T> it;
private final BlockingQueue<T> queue;
private int nOps = 0;
private final int printStateFreq = -1;
protected static Logger logger = Logger.getLogger(ThreadedIterator.class);
public void run() {
try {
while (it.hasNext()) {
// Spin loop...if queue has remaining capacity, add more elements.
// Otherwise, yield to the thread(s) waiting for the data.
if(queue.remainingCapacity() > 0 ) {
synchronized(this) {
queue.put(it.next());
printState("addNext");
}
}
else
Thread.yield();
}
} catch (InterruptedException ex) {
// bail out
;
}
}
public synchronized void printState(final String op) {
if ( printStateFreq != -1 && nOps++ % printStateFreq == 0 )
logger.info(String.format(" [%s] Queue has %d elements %d ops%n", op, queue.size(), nOps));
}
public ThreadedIterator(Iterator<T> it, int buffSize) {
this.it = it;
queue = new LinkedBlockingQueue<T>(buffSize);
new Thread(this).start();
}
public synchronized boolean hasNext() {
return queue.peek() != null || it.hasNext();
}
public T next() {
printState("getNext");
try {
return queue.poll(10, TimeUnit.SECONDS);
} catch (InterruptedException ex) {
// bail out
logger.info("ThreadedIterator next() timed out...");
printState("getNext");
return null;
}
}
public void remove () {
throw new UnsupportedOperationException();
}
}

View File

@ -71,8 +71,6 @@ public abstract class TraversalEngine {
protected int maxOnFlySorts = 100000; protected int maxOnFlySorts = 100000;
protected double downsamplingFraction = 1.0; protected double downsamplingFraction = 1.0;
protected int downsamplingCoverage = 0; protected int downsamplingCoverage = 0;
protected boolean THREADED_IO = false;
protected int THREADED_IO_BUFFER_SIZE = 10000;
protected boolean filterZeroMappingQualityReads = false; protected boolean filterZeroMappingQualityReads = false;
@ -126,14 +124,6 @@ public abstract class TraversalEngine {
this.filterZeroMappingQualityReads = filterZeroMappingQualityReads; this.filterZeroMappingQualityReads = filterZeroMappingQualityReads;
} }
public void setThreadedIO(final boolean threadedIO) {
this.THREADED_IO = threadedIO;
}
public void setWalkOverAllSites(final boolean walkOverAllSites) {
this.walkOverAllSites = walkOverAllSites;
}
private void setDebugging(final boolean d) { private void setDebugging(final boolean d) {
DEBUGGING = d; DEBUGGING = d;
} }
@ -179,7 +169,6 @@ public abstract class TraversalEngine {
public void setReadFilters( Reads reads) { public void setReadFilters( Reads reads) {
if( reads.getDownsamplingFraction() != null ) setDownsampleByFraction(reads.getDownsamplingFraction()); if( reads.getDownsamplingFraction() != null ) setDownsampleByFraction(reads.getDownsamplingFraction());
if( reads.getDownsampleToCoverage() != null ) setDownsampleByCoverage(reads.getDownsampleToCoverage()); if( reads.getDownsampleToCoverage() != null ) setDownsampleByCoverage(reads.getDownsampleToCoverage());
if( reads.getMaxOnTheFlySorts() != null ) setSortOnFly(reads.getMaxOnTheFlySorts());
if( reads.getSafetyChecking() != null ) setSafetyChecking(reads.getSafetyChecking()); if( reads.getSafetyChecking() != null ) setSafetyChecking(reads.getSafetyChecking());
} }
@ -346,11 +335,6 @@ public abstract class TraversalEngine {
StingSAMIterator wrappedIterator = StingSAMIteratorAdapter.adapt(null,rawIterator); StingSAMIterator wrappedIterator = StingSAMIteratorAdapter.adapt(null,rawIterator);
wrappedIterator = applyDecoratingIterators(enableVerification,wrappedIterator); wrappedIterator = applyDecoratingIterators(enableVerification,wrappedIterator);
if (THREADED_IO) {
logger.info(String.format("Enabling threaded I/O with buffer of %d reads", THREADED_IO_BUFFER_SIZE));
wrappedIterator = StingSAMIteratorAdapter.adapt(null,new ThreadedIterator<SAMRecord>(wrappedIterator, THREADED_IO_BUFFER_SIZE));
}
return wrappedIterator; return wrappedIterator;
} }
@ -365,7 +349,6 @@ public abstract class TraversalEngine {
return applyDecoratingIterators(enableVerification, return applyDecoratingIterators(enableVerification,
wrappedIterator, wrappedIterator,
DOWNSAMPLE_BY_FRACTION ? downsamplingFraction : null, DOWNSAMPLE_BY_FRACTION ? downsamplingFraction : null,
SORT_ON_FLY ? maxOnFlySorts : null,
filterZeroMappingQualityReads, filterZeroMappingQualityReads,
beSafeP); beSafeP);
} }
@ -377,7 +360,6 @@ public abstract class TraversalEngine {
public static StingSAMIterator applyDecoratingIterators(boolean enableVerification, public static StingSAMIterator applyDecoratingIterators(boolean enableVerification,
StingSAMIterator wrappedIterator, StingSAMIterator wrappedIterator,
Double downsamplingFraction, Double downsamplingFraction,
Integer maxOnFlySorts,
Boolean filterZeroMappingQualityReads, Boolean filterZeroMappingQualityReads,
Boolean beSafeP) { Boolean beSafeP) {
// NOTE: this (and other filtering) should be done before on-the-fly sorting // NOTE: this (and other filtering) should be done before on-the-fly sorting
@ -385,9 +367,6 @@ public abstract class TraversalEngine {
if (downsamplingFraction != null) if (downsamplingFraction != null)
wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction); wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction);
if (maxOnFlySorts != null)
wrappedIterator = new SortSamIterator(wrappedIterator, maxOnFlySorts);
if (beSafeP != null && beSafeP && enableVerification) if (beSafeP != null && beSafeP && enableVerification)
wrappedIterator = new VerifyingSamIterator(wrappedIterator); wrappedIterator = new VerifyingSamIterator(wrappedIterator);

View File

@ -85,14 +85,11 @@ public class GATKArgumentCollectionTest extends BaseTest {
collect.DBSNPFile = "DBSNPFile".toLowerCase(); collect.DBSNPFile = "DBSNPFile".toLowerCase();
collect.HAPMAPFile = "HAPMAPFile".toLowerCase(); collect.HAPMAPFile = "HAPMAPFile".toLowerCase();
collect.HAPMAPChipFile = "HAPMAPChipFile".toLowerCase(); collect.HAPMAPChipFile = "HAPMAPChipFile".toLowerCase();
collect.enabledThreadedIO = true;
collect.unsafe = false; collect.unsafe = false;
collect.maximumReadSorts = null;
collect.downsampleFraction = null; collect.downsampleFraction = null;
collect.downsampleCoverage = null; collect.downsampleCoverage = null;
collect.intervals = new ArrayList<String>(); collect.intervals = new ArrayList<String>();
collect.intervals.add("intervals".toLowerCase()); collect.intervals.add("intervals".toLowerCase());
collect.walkAllLoci = true;
collect.disableThreading = false; collect.disableThreading = false;
collect.outFileName = "outFileName".toLowerCase(); collect.outFileName = "outFileName".toLowerCase();
collect.errFileName = "errFileName".toLowerCase(); collect.errFileName = "errFileName".toLowerCase();