From 9b182e30635fdbbc01bad3b59cbd3bc3e5bd57de Mon Sep 17 00:00:00 2001 From: hanna Date: Wed, 1 Jul 2009 18:23:35 +0000 Subject: [PATCH] Prep for documenting command-line arguments: delete some arguments that don't make sense any more given the state of the traversals and GATK input requirements: all_loci (replaced by walker annotation), max OTF sorts (bam files must be sorted and indexed), threaded io (replaced by data sharding framework). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1144 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/GATKArgumentCollection.java | 22 ----- .../sting/gatk/GenomeAnalysisEngine.java | 5 - .../org/broadinstitute/sting/gatk/Reads.java | 12 --- .../simpleDataSources/SAMDataSource.java | 2 - .../gatk/iterators/SamQueryIterator.java | 93 ------------------- .../sting/gatk/iterators/SortSamIterator.java | 56 ----------- .../gatk/iterators/ThreadedIterator.java | 75 --------------- .../gatk/traversals/TraversalEngine.java | 21 ----- .../gatk/GATKArgumentCollectionTest.java | 3 - 9 files changed, 289 deletions(-) delete mode 100755 java/src/org/broadinstitute/sting/gatk/iterators/SamQueryIterator.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/iterators/SortSamIterator.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/iterators/ThreadedIterator.java diff --git a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java index 8f9a014b0..389f302e3 100755 --- a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java @@ -96,18 +96,10 @@ public class GATKArgumentCollection { @Argument(fullName = "outerr", shortName = "oe", doc = "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", required = false) public String outErrFileName = null; - @Element(required = false) - @Argument(fullName = "all_loci", shortName = "A", doc = "Should we process all loci, not just those covered by reads", required = false) - public Boolean walkAllLoci = false; - @Element(required = false) @Argument(fullName = "maximum_reads", shortName = "M", doc = "Maximum number of iterations to process before exiting, the lower bound is zero. Intended only for testing", required = false) public Integer maximumEngineIterations = -1; - @Element(required = false) - @Argument(fullName = "sort_on_the_fly", shortName = "sort", doc = "Maximum number of reads to sort on the fly", required = false) - public Integer maximumReadSorts = null; - @Element(required = false) @Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false) public Integer BAMcompression = null; @@ -132,10 +124,6 @@ public class GATKArgumentCollection { @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations, nothing will be checked at runtime.", required = false) public Boolean unsafe = false; - @Element(required = false) - @Argument(fullName = "threaded_IO", shortName = "P", doc = "If set, enables threaded I/O operations", required = false) - public Boolean enabledThreadedIO = false; - @Element(required = false) @Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false) public Boolean disableThreading = false; @@ -253,16 +241,9 @@ public class GATKArgumentCollection { if (!other.HAPMAPChipFile.equals(this.HAPMAPChipFile)) { return false; } - if (!other.enabledThreadedIO.equals(this.enabledThreadedIO)) { - return false; - } if (!other.unsafe.equals(this.unsafe)) { return false; } - if (( other.maximumReadSorts == null && this.maximumReadSorts != null ) || - ( other.maximumReadSorts != null && !other.maximumReadSorts.equals(this.maximumReadSorts) )) { - return false; - } if (( other.BAMcompression == null && this.BAMcompression != null ) || ( other.BAMcompression != null && !other.BAMcompression.equals(this.BAMcompression) )) { return false; @@ -279,9 +260,6 @@ public class GATKArgumentCollection { ( other.downsampleCoverage != null && !other.downsampleCoverage.equals(this.downsampleCoverage) )) { return false; } - if (!other.walkAllLoci.equals(this.walkAllLoci)) { - return false; - } if (!other.outFileName.equals(this.outFileName)) { return false; } diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 70158447c..58381f317 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -157,8 +157,6 @@ public class GenomeAnalysisEngine { // we need to verify different parameter based on the walker type if (my_walker instanceof LocusWalker || my_walker instanceof LocusWindowWalker) { // create the MicroScheduler - if( argCollection.walkAllLoci ) - Utils.scareUser("Argument --all_loci is deprecated. Please annotate your walker with @By(DataSource.REFERENCE) to perform a by-reference traversal."); microScheduler = MicroScheduler.create(my_walker, extractSourceInfoFromArguments(argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads); engine = microScheduler.getTraversalEngine(); } @@ -193,8 +191,6 @@ public class GenomeAnalysisEngine { engine.setReadFilters(sourceInfo); - engine.setThreadedIO(argCollection.enabledThreadedIO); - engine.setWalkOverAllSites(argCollection.walkAllLoci); engine.initialize(); } @@ -226,7 +222,6 @@ public class GenomeAnalysisEngine { getValidationStringency(), argCollection.downsampleFraction, argCollection.downsampleCoverage, - argCollection.maximumReadSorts, !argCollection.unsafe, argCollection.filterZeroMappingQualityReads ); } diff --git a/java/src/org/broadinstitute/sting/gatk/Reads.java b/java/src/org/broadinstitute/sting/gatk/Reads.java index a996aedb8..54ad4f429 100755 --- a/java/src/org/broadinstitute/sting/gatk/Reads.java +++ b/java/src/org/broadinstitute/sting/gatk/Reads.java @@ -30,7 +30,6 @@ public class Reads { private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT; private Double downsamplingFraction = null; private Integer downsampleToCoverage = null; - private Integer maxOnFlySorts = null; private Boolean beSafe = null; private Boolean filterZeroMappingQualityReads = null; @@ -66,14 +65,6 @@ public class Reads { return downsampleToCoverage; } - /** - * Get the maximum number of supported on-the-fly sorts. - * @return Max number of on-the-fly sorts. - */ - public Integer getMaxOnTheFlySorts() { - return maxOnFlySorts; - } - /** * Return whether to 'verify' the reads as we pass through them. * @return Whether to verify the reads. @@ -102,7 +93,6 @@ public class Reads { * @param strictness Stringency of reads file parsing. * @param downsampleFraction fraction of reads to downsample. * @param downsampleCoverage downsampling per-locus. - * @param maxOnFlySorts how many sorts to perform on-the-fly. * @param beSafe Whether to enable safety checking. * @param filterZeroMappingQualityReads whether to filter zero mapping quality reads. */ @@ -110,14 +100,12 @@ public class Reads { SAMFileReader.ValidationStringency strictness, Double downsampleFraction, Integer downsampleCoverage, - Integer maxOnFlySorts, Boolean beSafe, Boolean filterZeroMappingQualityReads ) { this.readsFiles = samFiles; this.validationStringency = strictness; this.downsamplingFraction = downsampleFraction; this.downsampleToCoverage = downsampleCoverage; - this.maxOnFlySorts = maxOnFlySorts; this.beSafe = beSafe; this.filterZeroMappingQualityReads = filterZeroMappingQualityReads; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index dbc47521c..cf2edaf43 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -133,7 +133,6 @@ public class SAMDataSource implements SimpleDataSource { iterator = TraversalEngine.applyDecoratingIterators(true, iterator, reads.getDownsamplingFraction(), - reads.getMaxOnTheFlySorts(), reads.getFilterZeroMappingQualityReads(), reads.getSafetyChecking()); } else if (shard.getShardType() == Shard.ShardType.LOCUS || shard.getShardType() == Shard.ShardType.INTERVAL) { @@ -141,7 +140,6 @@ public class SAMDataSource implements SimpleDataSource { iterator = TraversalEngine.applyDecoratingIterators(false, iterator, reads.getDownsamplingFraction(), - reads.getMaxOnTheFlySorts(), reads.getFilterZeroMappingQualityReads(), reads.getSafetyChecking()); } else { diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/SamQueryIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/SamQueryIterator.java deleted file mode 100755 index 9697f68be..000000000 --- a/java/src/org/broadinstitute/sting/gatk/iterators/SamQueryIterator.java +++ /dev/null @@ -1,93 +0,0 @@ -package org.broadinstitute.sting.gatk.iterators; - -import java.util.Arrays; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.util.CloseableIterator; -import org.broadinstitute.sting.utils.GenomeLoc; - -/** - * Created by IntelliJ IDEA. - * User: hanna - * Date: Mar 16, 2009 - * Time: 6:08:08 PM - * To change this template use File | Settings | File Templates. - */ -public class SamQueryIterator implements Iterator { - - SAMFileReader reader = null; - - Iterator locIter = null; - CloseableIterator recordIter = null; - - public SamQueryIterator( SAMFileReader reader, ArrayList locs ) { - this.reader = reader; - - // Our internal contract for the class guarantees that locIter and recordIter are never null. - // Initialize them and seed them with empty data as necessary. - if(locs != null) { - // The user requested a specific set of locations, set up the iterators accordly. - locIter = locs.iterator(); - recordIter = new NullCloseableIterator(); - } - else { - // The user requested traversal of the entire SAM file. Handle that here. - // TODO: This would be better handled as a completely separate iterator. - locIter = new ArrayList().iterator(); - recordIter = reader.iterator(); - } - - bumpToNextSAMRecord(); - } - - public boolean hasNext() { - bumpToNextSAMRecord(); - return recordIter.hasNext(); - } - - public SAMRecord next() { - bumpToNextSAMRecord(); - return recordIter.next(); - } - - /** - * Bump the loc iterator to the next spot with a read. - * - * For simplicity's sake, bumpToNextSAMRecord() expects locIter and recordIter to be non-null, and - * guarantees that locIter and recordIter will be non-null after the bump. - */ - private void bumpToNextSAMRecord() { - // If there's a record still waiting in the current iterator, do nothing. - if( recordIter.hasNext() ) { - return; - } - - // Otherwise, find the next record. - recordIter.close(); - while( locIter.hasNext() ) { - GenomeLoc currentLoc = locIter.next(); - recordIter = reader.queryOverlapping( currentLoc.getContig(), - (int)currentLoc.getStart(), - (int)currentLoc.getStop() ); - if( recordIter.hasNext() ) - break; - else - recordIter.close(); - } - } - - public void remove() { - throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); - } - - private class NullCloseableIterator implements CloseableIterator { - public boolean hasNext() { return false; } - public T next() { throw new java.util.NoSuchElementException(); } - public void close() {} - public void remove() {} - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/SortSamIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/SortSamIterator.java deleted file mode 100755 index 9c4760970..000000000 --- a/java/src/org/broadinstitute/sting/gatk/iterators/SortSamIterator.java +++ /dev/null @@ -1,56 +0,0 @@ -package org.broadinstitute.sting.gatk.iterators; - -import org.broadinstitute.sting.utils.ComparableSAMRecord; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.gatk.Reads; - -import net.sf.samtools.SAMRecord; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; - -// TODO: Deprecate? -// I don't think we need this if we're only allowing sorted and indexed BAM Files in the GATK - Aaron -public class SortSamIterator implements StingSAMIterator { - private Reads sourceInfo; - private Iterator it; - - public SortSamIterator(StingSAMIterator unsortedIter, int maxSorts) { - sourceInfo = unsortedIter.getSourceInfo(); - ArrayList list = new ArrayList(); - while (unsortedIter.hasNext()) { - list.add(new ComparableSAMRecord(unsortedIter.next())); - // limit how much can be sorted for now - if (list.size() > maxSorts) - throw new UnsupportedOperationException("Can not sort files with more than " + maxSorts + " reads on the fly!"); - } - Collections.sort(list); - it = list.iterator(); - } - - /** - * Retrieves information about reads sources. - * @return Info about the sources of reads. - */ - public Reads getSourceInfo() { - if( sourceInfo == null ) - throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework."); - return sourceInfo; - } - - public boolean hasNext() { return it.hasNext(); } - public SAMRecord next() { return it.next().getRecord(); } - - public void remove() { - throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); - } - - public void close() { - // nothing to do right now - } - - public Iterator iterator() { - return this; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/ThreadedIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/ThreadedIterator.java deleted file mode 100755 index 42e52085e..000000000 --- a/java/src/org/broadinstitute/sting/gatk/iterators/ThreadedIterator.java +++ /dev/null @@ -1,75 +0,0 @@ -package org.broadinstitute.sting.gatk.iterators; - -import org.apache.log4j.Logger; - -import java.util.Iterator; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.TimeUnit; - -/** - * Created by IntelliJ IDEA. - * User: depristo - * Date: Feb 24, 2009 - * Time: 10:24:38 AM - * To change this template use File | Settings | File Templates. - */ -public class ThreadedIterator implements Iterator, Runnable { - private Iterator it; - private final BlockingQueue queue; - private int nOps = 0; - private final int printStateFreq = -1; - - protected static Logger logger = Logger.getLogger(ThreadedIterator.class); - - public void run() { - try { - while (it.hasNext()) { - // Spin loop...if queue has remaining capacity, add more elements. - // Otherwise, yield to the thread(s) waiting for the data. - if(queue.remainingCapacity() > 0 ) { - synchronized(this) { - queue.put(it.next()); - printState("addNext"); - } - } - else - Thread.yield(); - } - } catch (InterruptedException ex) { - // bail out - ; - } - } - - public synchronized void printState(final String op) { - if ( printStateFreq != -1 && nOps++ % printStateFreq == 0 ) - logger.info(String.format(" [%s] Queue has %d elements %d ops%n", op, queue.size(), nOps)); - } - - public ThreadedIterator(Iterator it, int buffSize) { - this.it = it; - queue = new LinkedBlockingQueue(buffSize); - new Thread(this).start(); - } - - public synchronized boolean hasNext() { - return queue.peek() != null || it.hasNext(); - } - - public T next() { - printState("getNext"); - try { - return queue.poll(10, TimeUnit.SECONDS); - } catch (InterruptedException ex) { - // bail out - logger.info("ThreadedIterator next() timed out..."); - printState("getNext"); - return null; - } - } - - public void remove () { - throw new UnsupportedOperationException(); - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index f9fde41ca..939df9c31 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -71,8 +71,6 @@ public abstract class TraversalEngine { protected int maxOnFlySorts = 100000; protected double downsamplingFraction = 1.0; protected int downsamplingCoverage = 0; - protected boolean THREADED_IO = false; - protected int THREADED_IO_BUFFER_SIZE = 10000; protected boolean filterZeroMappingQualityReads = false; @@ -126,14 +124,6 @@ public abstract class TraversalEngine { this.filterZeroMappingQualityReads = filterZeroMappingQualityReads; } - public void setThreadedIO(final boolean threadedIO) { - this.THREADED_IO = threadedIO; - } - - public void setWalkOverAllSites(final boolean walkOverAllSites) { - this.walkOverAllSites = walkOverAllSites; - } - private void setDebugging(final boolean d) { DEBUGGING = d; } @@ -179,7 +169,6 @@ public abstract class TraversalEngine { public void setReadFilters( Reads reads) { if( reads.getDownsamplingFraction() != null ) setDownsampleByFraction(reads.getDownsamplingFraction()); if( reads.getDownsampleToCoverage() != null ) setDownsampleByCoverage(reads.getDownsampleToCoverage()); - if( reads.getMaxOnTheFlySorts() != null ) setSortOnFly(reads.getMaxOnTheFlySorts()); if( reads.getSafetyChecking() != null ) setSafetyChecking(reads.getSafetyChecking()); } @@ -346,11 +335,6 @@ public abstract class TraversalEngine { StingSAMIterator wrappedIterator = StingSAMIteratorAdapter.adapt(null,rawIterator); wrappedIterator = applyDecoratingIterators(enableVerification,wrappedIterator); - if (THREADED_IO) { - logger.info(String.format("Enabling threaded I/O with buffer of %d reads", THREADED_IO_BUFFER_SIZE)); - wrappedIterator = StingSAMIteratorAdapter.adapt(null,new ThreadedIterator(wrappedIterator, THREADED_IO_BUFFER_SIZE)); - } - return wrappedIterator; } @@ -365,7 +349,6 @@ public abstract class TraversalEngine { return applyDecoratingIterators(enableVerification, wrappedIterator, DOWNSAMPLE_BY_FRACTION ? downsamplingFraction : null, - SORT_ON_FLY ? maxOnFlySorts : null, filterZeroMappingQualityReads, beSafeP); } @@ -377,7 +360,6 @@ public abstract class TraversalEngine { public static StingSAMIterator applyDecoratingIterators(boolean enableVerification, StingSAMIterator wrappedIterator, Double downsamplingFraction, - Integer maxOnFlySorts, Boolean filterZeroMappingQualityReads, Boolean beSafeP) { // NOTE: this (and other filtering) should be done before on-the-fly sorting @@ -385,9 +367,6 @@ public abstract class TraversalEngine { if (downsamplingFraction != null) wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction); - if (maxOnFlySorts != null) - wrappedIterator = new SortSamIterator(wrappedIterator, maxOnFlySorts); - if (beSafeP != null && beSafeP && enableVerification) wrappedIterator = new VerifyingSamIterator(wrappedIterator); diff --git a/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java b/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java index 2981af995..f41ac1eee 100755 --- a/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java +++ b/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java @@ -85,14 +85,11 @@ public class GATKArgumentCollectionTest extends BaseTest { collect.DBSNPFile = "DBSNPFile".toLowerCase(); collect.HAPMAPFile = "HAPMAPFile".toLowerCase(); collect.HAPMAPChipFile = "HAPMAPChipFile".toLowerCase(); - collect.enabledThreadedIO = true; collect.unsafe = false; - collect.maximumReadSorts = null; collect.downsampleFraction = null; collect.downsampleCoverage = null; collect.intervals = new ArrayList(); collect.intervals.add("intervals".toLowerCase()); - collect.walkAllLoci = true; collect.disableThreading = false; collect.outFileName = "outFileName".toLowerCase(); collect.errFileName = "errFileName".toLowerCase();