From a3e0ec20c4fbbd454930723dcd677acf8f73a1b6 Mon Sep 17 00:00:00 2001 From: hanna Date: Wed, 1 Jul 2009 13:46:35 +0000 Subject: [PATCH] Kill the TraverseByLocusWindows traversal. TraverseLocusWindows will take its place. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1138 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/GATKArgumentCollection.java | 4 - .../sting/gatk/GenomeAnalysisEngine.java | 41 +--- .../traversals/TraverseByLocusWindows.java | 198 ------------------ 3 files changed, 1 insertion(+), 242 deletions(-) delete mode 100755 java/src/org/broadinstitute/sting/gatk/traversals/TraverseByLocusWindows.java diff --git a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java index 65fdfd620..8f9a014b0 100755 --- a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java @@ -140,10 +140,6 @@ public class GATKArgumentCollection { @Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false) public Boolean disableThreading = false; - @Element(required = false) - @Argument(fullName = "use_new_tblw", shortName= "tblw", doc="Use the legacy traverse by locus window.", required = false) - public Boolean useNewTraverseByLocusWindow = false; - /** How many threads should be allocated to this analysis. */ @Element(required = false) @Argument(fullName = "numthreads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false) diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 4809e3490..70158447c 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -116,14 +116,7 @@ public class GenomeAnalysisEngine { // our microscheduler, which is in charge of running everything MicroScheduler microScheduler = null; - // if we're a read or a locus walker, we use the new system. Right now we have complicated - // branching based on the input data, but this should disapear when all the traversals are switched over - if (my_walker instanceof LocusWindowWalker && !args.useNewTraverseByLocusWindow) { - legacyTraversal(my_walker, rods); - return; - } else { // we have an old style traversal, once we're done return - microScheduler = createMicroscheduler(my_walker, rods); - } + microScheduler = createMicroscheduler(my_walker, rods); // Prepare the sort ordering w.r.t. the sequence dictionary if (argCollection.referenceFile != null) { @@ -150,38 +143,6 @@ public class GenomeAnalysisEngine { walkerReturn = microScheduler.execute(my_walker, locs, argCollection.maximumEngineIterations); } - /** - * this is to accomdate the older style traversals, that haven't been converted over to the new system. Putting them - * into their own function allows us to deviate in the two behaviors so the new style traversals aren't limited to what - * the old style does. As traversals are converted, this function should disappear. - * - * @param my_walker the walker to run - * @param rods the rod tracks to associate with - */ - private void legacyTraversal(Walker my_walker, List> rods) { - if (my_walker instanceof LocusWindowWalker) { - this.engine = new TraverseByLocusWindows(argCollection.samFiles, argCollection.referenceFile, rods); - } else { - throw new RuntimeException("Unexpected walker type: " + my_walker); - } - - // Prepare the sort ordering w.r.t. the sequence dictionary - if (argCollection.referenceFile != null) { - final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(argCollection.referenceFile); - GenomeLocParser.setupRefContigOrdering(refFile); - } - - // Determine the validation stringency. Default to ValidationStringency.STRICT. - ValidationStringency strictness = getValidationStringency(); - - logger.info("Strictness is " + strictness); - genericEngineSetup(); - - // store the results of the walker run - walkerReturn = engine.traverse(my_walker); - - } - /** * setup a microscheduler * diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByLocusWindows.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByLocusWindows.java deleted file mode 100755 index 3d7b5d519..000000000 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByLocusWindows.java +++ /dev/null @@ -1,198 +0,0 @@ -package org.broadinstitute.sting.gatk.traversals; - -import net.sf.picard.sam.SamFileHeaderMerger; -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.util.CloseableIterator; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.LocusContext; -import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; - -import java.io.File; -import java.io.FileNotFoundException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; - -/** - * Created by IntelliJ IDEA. - * User: ebanks - * Date: Apr 23, 2009 - * Time: 10:26:03 AM - * To change this template use File | Settings | File Templates. - */ -public class TraverseByLocusWindows extends TraversalEngine { - - private IndexedFastaSequenceFile sequenceFile = null; - - public TraverseByLocusWindows(List reads, File ref, List> rods) { - super(reads, ref, rods); - } - - @Override - public T traverse(Walker walker, List locations) { - if ( walker instanceof LocusWindowWalker ) { - LocusWindowWalker locusWindowWalker = (LocusWindowWalker)walker; - T sum = traverseByIntervals(locusWindowWalker, locations); - return sum; - } else { - throw new IllegalArgumentException("Walker isn't an interval walker!"); - } - } - - /** - * Traverse by intervals -- the key driver of linearly ordered traversal of intervals. Provides reads, RODs, and - * the reference base for each interval in the reference to the intervalWalker walker. Supports all of the - * interaction contract implied by the interval walker - * - * @param walker An interval walker object - * @param MapType -- the result of calling map() on walker - * @param ReduceType -- the result of calling reduce() on the walker - * @return 0 on success - */ - protected T traverseByIntervals(LocusWindowWalker walker, List locations) { - logger.debug("Entering traverseByIntervals"); - - if(readsFiles.size() > 1) - throw new UnsupportedOperationException("Cannot do ByInterval traversal on file with multiple inputs."); - - try { - sequenceFile = new IndexedFastaSequenceFile(GenomeAnalysisEngine.instance.getArguments().referenceFile); - } - catch ( FileNotFoundException ex ) { - throw new StingException("No ref!",ex); - } - - samReader = initializeSAMFile(readsFiles.get(0)); - - verifySortOrder(true); - - walker.initialize(); - - T sum = walker.reduceInit(); - - if ( locations.isEmpty() ) { - logger.debug("There are no intervals provided for the traversal"); - } else { - if ( !samReader.hasIndex() ) - Utils.scareUser("Processing locations were requested, but no index was found for the input SAM/BAM file. This operation is potentially dangerously slow, aborting."); - - sum = intervalTraversal(walker, locations, sum); - } - - //printOnTraversalDone("intervals", sum); - walker.onTraversalDone(sum); - return sum; - } - - protected T intervalTraversal(LocusWindowWalker walker, List locations, T sum) { - for ( GenomeLoc interval : locations ) { - logger.debug(String.format("Processing interval %s", interval.toString())); - - CloseableIterator readIter = getIteratorOverDesiredRegion( samReader, interval ); - Iterator wrappedIter = wrapReadsIterator(readIter, false); - LocusContext locus = getLocusContext(wrappedIter, interval); - readIter.close(); - - sum = carryWalkerOverInterval(walker, sum, locus); - } - return sum; - } - - private LocusContext getLocusContext(Iterator readIter, GenomeLoc interval) { - ArrayList reads = new ArrayList(); - boolean done = false; - long leftmostIndex = interval.getStart(), - rightmostIndex = interval.getStop(); - while (readIter.hasNext() && !done) { - TraversalStatistics.nRecords++; - - SAMRecord read = readIter.next(); - reads.add(read); - if ( read.getAlignmentStart() < leftmostIndex ) - leftmostIndex = read.getAlignmentStart(); - if ( read.getAlignmentEnd() > rightmostIndex ) - rightmostIndex = read.getAlignmentEnd(); - if ( this.maxReads > 0 && TraversalStatistics.nRecords > this.maxReads ) { - logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords)); - done = true; - } - } - - GenomeLoc window = GenomeLocParser.createGenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex); - LocusContext locus = new LocusContext(window, reads, null); - if ( DOWNSAMPLE_BY_COVERAGE ) - locus.downsampleToCoverage(downsamplingCoverage); - - return locus; - } - - protected T carryWalkerOverInterval(LocusWindowWalker walker, T sum, LocusContext window) { - - int contigLength = getSAMHeader().getSequence(window.getLocation().getContig()).getSequenceLength(); - String refSuffix = ""; - if ( window.getLocation().getStop() > contigLength ) { - refSuffix = Utils.dupString('x', (int)window.getLocation().getStop() - contigLength); - window.setLocation(GenomeLocParser.setStop(window.getLocation(),contigLength)); - } - - StringBuffer refBases = new StringBuffer(new String(sequenceFile.getSubsequenceAt(window.getContig(),window.getLocation().getStart(),window.getLocation().getStop()).getBases())); - refBases.append(refSuffix); - - // Iterate forward to get all reference ordered data covering this interval - final RefMetaDataTracker tracker = getReferenceOrderedDataAtLocus(window.getLocation()); - - sum = walkAtinterval( walker, sum, window, refBases.toString(), tracker ); - - printProgress("intervals", window.getLocation()); - - return sum; - } - - protected T walkAtinterval( final LocusWindowWalker walker, - T sum, - final LocusContext locus, - final String refSeq, - final RefMetaDataTracker tracker ) { - - // - // Execute our contract with the walker. Call filter, map, and reduce - // - final boolean keepMeP = walker.filter(tracker, refSeq, locus); - if (keepMeP) { - M x = walker.map(tracker, refSeq, locus); - sum = walker.reduce(x, sum); - } - - //printProgress("intervals", interval.getLocation()); - return sum; - } - - /** - * Gets an iterator over the specified region. Uses a special iterator that dynamically adds a header to all - * read information. - * @param reader SAMFileReader to query. - * @param region Region to use. - * @return An iterator over the desired region. - */ - private CloseableIterator getIteratorOverDesiredRegion( SAMFileReader reader, GenomeLoc region ) { - SamFileHeaderMerger headerMerger = new SamFileHeaderMerger( Collections.singletonList(reader), SAMFileHeader.SortOrder.coordinate ); - MergingSamRecordIterator2 iterator = new MergingSamRecordIterator2( headerMerger, new Reads(readsFiles) ); - if( region != null ) - iterator.queryOverlapping( region.getContig(), (int)region.getStart(), (int)region.getStop() ); - return iterator; - } -}