Kill the TraverseByLocusWindows traversal. TraverseLocusWindows will take its place.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1138 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
74e9bb46b4
commit
a3e0ec20c4
|
|
@ -140,10 +140,6 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false)
|
||||
public Boolean disableThreading = false;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "use_new_tblw", shortName= "tblw", doc="Use the legacy traverse by locus window.", required = false)
|
||||
public Boolean useNewTraverseByLocusWindow = false;
|
||||
|
||||
/** How many threads should be allocated to this analysis. */
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "numthreads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
|
||||
|
|
|
|||
|
|
@ -116,14 +116,7 @@ public class GenomeAnalysisEngine {
|
|||
// our microscheduler, which is in charge of running everything
|
||||
MicroScheduler microScheduler = null;
|
||||
|
||||
// if we're a read or a locus walker, we use the new system. Right now we have complicated
|
||||
// branching based on the input data, but this should disapear when all the traversals are switched over
|
||||
if (my_walker instanceof LocusWindowWalker && !args.useNewTraverseByLocusWindow) {
|
||||
legacyTraversal(my_walker, rods);
|
||||
return;
|
||||
} else { // we have an old style traversal, once we're done return
|
||||
microScheduler = createMicroscheduler(my_walker, rods);
|
||||
}
|
||||
microScheduler = createMicroscheduler(my_walker, rods);
|
||||
|
||||
// Prepare the sort ordering w.r.t. the sequence dictionary
|
||||
if (argCollection.referenceFile != null) {
|
||||
|
|
@ -150,38 +143,6 @@ public class GenomeAnalysisEngine {
|
|||
walkerReturn = microScheduler.execute(my_walker, locs, argCollection.maximumEngineIterations);
|
||||
}
|
||||
|
||||
/**
|
||||
* this is to accomdate the older style traversals, that haven't been converted over to the new system. Putting them
|
||||
* into their own function allows us to deviate in the two behaviors so the new style traversals aren't limited to what
|
||||
* the old style does. As traversals are converted, this function should disappear.
|
||||
*
|
||||
* @param my_walker the walker to run
|
||||
* @param rods the rod tracks to associate with
|
||||
*/
|
||||
private void legacyTraversal(Walker my_walker, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||
if (my_walker instanceof LocusWindowWalker) {
|
||||
this.engine = new TraverseByLocusWindows(argCollection.samFiles, argCollection.referenceFile, rods);
|
||||
} else {
|
||||
throw new RuntimeException("Unexpected walker type: " + my_walker);
|
||||
}
|
||||
|
||||
// Prepare the sort ordering w.r.t. the sequence dictionary
|
||||
if (argCollection.referenceFile != null) {
|
||||
final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(argCollection.referenceFile);
|
||||
GenomeLocParser.setupRefContigOrdering(refFile);
|
||||
}
|
||||
|
||||
// Determine the validation stringency. Default to ValidationStringency.STRICT.
|
||||
ValidationStringency strictness = getValidationStringency();
|
||||
|
||||
logger.info("Strictness is " + strictness);
|
||||
genericEngineSetup();
|
||||
|
||||
// store the results of the walker run
|
||||
walkerReturn = engine.traverse(my_walker);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* setup a microscheduler
|
||||
*
|
||||
|
|
|
|||
|
|
@ -1,198 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: ebanks
|
||||
* Date: Apr 23, 2009
|
||||
* Time: 10:26:03 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class TraverseByLocusWindows extends TraversalEngine {
|
||||
|
||||
private IndexedFastaSequenceFile sequenceFile = null;
|
||||
|
||||
public TraverseByLocusWindows(List<File> reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||
super(reads, ref, rods);
|
||||
}
|
||||
|
||||
@Override
|
||||
public <M,T> T traverse(Walker<M,T> walker, List<GenomeLoc> locations) {
|
||||
if ( walker instanceof LocusWindowWalker ) {
|
||||
LocusWindowWalker<M, T> locusWindowWalker = (LocusWindowWalker<M, T>)walker;
|
||||
T sum = traverseByIntervals(locusWindowWalker, locations);
|
||||
return sum;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Walker isn't an interval walker!");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverse by intervals -- the key driver of linearly ordered traversal of intervals. Provides reads, RODs, and
|
||||
* the reference base for each interval in the reference to the intervalWalker walker. Supports all of the
|
||||
* interaction contract implied by the interval walker
|
||||
*
|
||||
* @param walker An interval walker object
|
||||
* @param <M> MapType -- the result of calling map() on walker
|
||||
* @param <T> ReduceType -- the result of calling reduce() on the walker
|
||||
* @return 0 on success
|
||||
*/
|
||||
protected <M, T> T traverseByIntervals(LocusWindowWalker<M, T> walker, List<GenomeLoc> locations) {
|
||||
logger.debug("Entering traverseByIntervals");
|
||||
|
||||
if(readsFiles.size() > 1)
|
||||
throw new UnsupportedOperationException("Cannot do ByInterval traversal on file with multiple inputs.");
|
||||
|
||||
try {
|
||||
sequenceFile = new IndexedFastaSequenceFile(GenomeAnalysisEngine.instance.getArguments().referenceFile);
|
||||
}
|
||||
catch ( FileNotFoundException ex ) {
|
||||
throw new StingException("No ref!",ex);
|
||||
}
|
||||
|
||||
samReader = initializeSAMFile(readsFiles.get(0));
|
||||
|
||||
verifySortOrder(true);
|
||||
|
||||
walker.initialize();
|
||||
|
||||
T sum = walker.reduceInit();
|
||||
|
||||
if ( locations.isEmpty() ) {
|
||||
logger.debug("There are no intervals provided for the traversal");
|
||||
} else {
|
||||
if ( !samReader.hasIndex() )
|
||||
Utils.scareUser("Processing locations were requested, but no index was found for the input SAM/BAM file. This operation is potentially dangerously slow, aborting.");
|
||||
|
||||
sum = intervalTraversal(walker, locations, sum);
|
||||
}
|
||||
|
||||
//printOnTraversalDone("intervals", sum);
|
||||
walker.onTraversalDone(sum);
|
||||
return sum;
|
||||
}
|
||||
|
||||
protected <M, T> T intervalTraversal(LocusWindowWalker<M, T> walker, List<GenomeLoc> locations, T sum) {
|
||||
for ( GenomeLoc interval : locations ) {
|
||||
logger.debug(String.format("Processing interval %s", interval.toString()));
|
||||
|
||||
CloseableIterator<SAMRecord> readIter = getIteratorOverDesiredRegion( samReader, interval );
|
||||
Iterator<SAMRecord> wrappedIter = wrapReadsIterator(readIter, false);
|
||||
LocusContext locus = getLocusContext(wrappedIter, interval);
|
||||
readIter.close();
|
||||
|
||||
sum = carryWalkerOverInterval(walker, sum, locus);
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
private LocusContext getLocusContext(Iterator<SAMRecord> readIter, GenomeLoc interval) {
|
||||
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
boolean done = false;
|
||||
long leftmostIndex = interval.getStart(),
|
||||
rightmostIndex = interval.getStop();
|
||||
while (readIter.hasNext() && !done) {
|
||||
TraversalStatistics.nRecords++;
|
||||
|
||||
SAMRecord read = readIter.next();
|
||||
reads.add(read);
|
||||
if ( read.getAlignmentStart() < leftmostIndex )
|
||||
leftmostIndex = read.getAlignmentStart();
|
||||
if ( read.getAlignmentEnd() > rightmostIndex )
|
||||
rightmostIndex = read.getAlignmentEnd();
|
||||
if ( this.maxReads > 0 && TraversalStatistics.nRecords > this.maxReads ) {
|
||||
logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords));
|
||||
done = true;
|
||||
}
|
||||
}
|
||||
|
||||
GenomeLoc window = GenomeLocParser.createGenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex);
|
||||
LocusContext locus = new LocusContext(window, reads, null);
|
||||
if ( DOWNSAMPLE_BY_COVERAGE )
|
||||
locus.downsampleToCoverage(downsamplingCoverage);
|
||||
|
||||
return locus;
|
||||
}
|
||||
|
||||
protected <M, T> T carryWalkerOverInterval(LocusWindowWalker<M, T> walker, T sum, LocusContext window) {
|
||||
|
||||
int contigLength = getSAMHeader().getSequence(window.getLocation().getContig()).getSequenceLength();
|
||||
String refSuffix = "";
|
||||
if ( window.getLocation().getStop() > contigLength ) {
|
||||
refSuffix = Utils.dupString('x', (int)window.getLocation().getStop() - contigLength);
|
||||
window.setLocation(GenomeLocParser.setStop(window.getLocation(),contigLength));
|
||||
}
|
||||
|
||||
StringBuffer refBases = new StringBuffer(new String(sequenceFile.getSubsequenceAt(window.getContig(),window.getLocation().getStart(),window.getLocation().getStop()).getBases()));
|
||||
refBases.append(refSuffix);
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this interval
|
||||
final RefMetaDataTracker tracker = getReferenceOrderedDataAtLocus(window.getLocation());
|
||||
|
||||
sum = walkAtinterval( walker, sum, window, refBases.toString(), tracker );
|
||||
|
||||
printProgress("intervals", window.getLocation());
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
protected <M, T> T walkAtinterval( final LocusWindowWalker<M, T> walker,
|
||||
T sum,
|
||||
final LocusContext locus,
|
||||
final String refSeq,
|
||||
final RefMetaDataTracker tracker ) {
|
||||
|
||||
//
|
||||
// Execute our contract with the walker. Call filter, map, and reduce
|
||||
//
|
||||
final boolean keepMeP = walker.filter(tracker, refSeq, locus);
|
||||
if (keepMeP) {
|
||||
M x = walker.map(tracker, refSeq, locus);
|
||||
sum = walker.reduce(x, sum);
|
||||
}
|
||||
|
||||
//printProgress("intervals", interval.getLocation());
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an iterator over the specified region. Uses a special iterator that dynamically adds a header to all
|
||||
* read information.
|
||||
* @param reader SAMFileReader to query.
|
||||
* @param region Region to use.
|
||||
* @return An iterator over the desired region.
|
||||
*/
|
||||
private CloseableIterator<SAMRecord> getIteratorOverDesiredRegion( SAMFileReader reader, GenomeLoc region ) {
|
||||
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger( Collections.singletonList(reader), SAMFileHeader.SortOrder.coordinate );
|
||||
MergingSamRecordIterator2 iterator = new MergingSamRecordIterator2( headerMerger, new Reads(readsFiles) );
|
||||
if( region != null )
|
||||
iterator.queryOverlapping( region.getContig(), (int)region.getStart(), (int)region.getStop() );
|
||||
return iterator;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue