Sketch of new version of TraverseByLocusWindow, and a flag to conditionally turn it on.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1097 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-06-25 18:20:56 +00:00
parent 4e04370f14
commit 102b38c055
4 changed files with 115 additions and 1 deletions

View File

@ -140,6 +140,10 @@ public class GATKArgumentCollection {
@Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false)
public Boolean disableThreading = false;
@Element(required = false)
@Argument(fullName = "use_new_tblw", shortName= "tblw", doc="Use the legacy traverse by locus window.", required = false)
public Boolean useNewTraverseByLocusWindow = false;
/** How many threads should be allocated to this analysis. */
@Element(required = false)
@Argument(fullName = "numthreads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)

View File

@ -116,7 +116,7 @@ public class GenomeAnalysisEngine {
// if we're a read or a locus walker, we use the new system. Right now we have complicated
// branching based on the input data, but this should disapear when all the traversals are switched over
if (my_walker instanceof LocusWalker || my_walker instanceof ReadWalker || my_walker instanceof DuplicateWalker) {
if (!(my_walker instanceof LocusWindowWalker) && !args.useNewTraverseByLocusWindow) {
microScheduler = createMicroscheduler(my_walker, rods);
} else { // we have an old style traversal, once we're done return
legacyTraversal(my_walker, rods);

View File

@ -54,6 +54,19 @@ public class LocusReferenceView extends ReferenceView {
return StringUtil.bytesToString( referenceSequence.getBases(), offset, 1 ).charAt(0);
}
/**
* Allow the user to pull reference info from any arbitrary region of the reference.
* Assume the user has already performed all necessary bounds checking.
* TODO: This function is nearly identical to that in the ReadReferenceView. Merge the common functionality.
* @param genomeLoc The locus.
* @return A list of the bases starting at the start of the locus (inclusive) and ending
* at the end of the locus (inclusive).
*/
public char[] getReferenceBases( GenomeLoc genomeLoc ) {
ReferenceSequence subsequence = reference.getSubsequenceAt(genomeLoc.getContig(),genomeLoc.getStart(),genomeLoc.getStop());
return StringUtil.bytesToString(subsequence.getBases()).toCharArray();
}
/**
* Validates that the genomeLoc is one base wide and is in the reference sequence.
* @param genomeLoc location to verify.

View File

@ -0,0 +1,97 @@
package org.broadinstitute.sting.gatk.traversals;
import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.providers.*;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.*;
import java.io.File;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: ebanks
* Date: Apr 23, 2009
* Time: 10:26:03 AM
* To change this template use File | Settings | File Templates.
*/
public class TraverseLocusWindows extends TraversalEngine {
public TraverseLocusWindows(List<File> reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
super(reads, ref, rods);
}
public <M,T> T traverse( Walker<M,T> walker,
Shard shard,
ShardDataProvider dataProvider,
T sum ) {
if ( !(walker instanceof LocusWindowWalker) )
throw new IllegalArgumentException("Walker isn't a locus window walker!");
LocusWindowWalker<M, T> locusWindowWalker = (LocusWindowWalker<M, T>)walker;
GenomeLoc interval = shard.getGenomeLoc();
ReadView readView = new ReadView( dataProvider );
LocusReferenceView referenceView = new LocusReferenceView( dataProvider );
ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
LocusContext locus = getLocusContext(readView.iterator(), interval);
String referenceSubsequence = new String(referenceView.getReferenceBases(interval));
// Iterate forward to get all reference ordered data covering this interval
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation());
//
// Execute our contract with the walker. Call filter, map, and reduce
//
final boolean keepMeP = locusWindowWalker.filter(tracker, referenceSubsequence, locus);
if (keepMeP) {
M x = locusWindowWalker.map(tracker, referenceSubsequence, locus);
sum = locusWindowWalker.reduce(x, sum);
}
printProgress("intervals", locus.getLocation());
return sum;
}
private LocusContext getLocusContext(Iterator<SAMRecord> readIter, GenomeLoc interval) {
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
boolean done = false;
long leftmostIndex = interval.getStart(),
rightmostIndex = interval.getStop();
while (readIter.hasNext() && !done) {
TraversalStatistics.nRecords++;
SAMRecord read = readIter.next();
reads.add(read);
if ( read.getAlignmentStart() < leftmostIndex )
leftmostIndex = read.getAlignmentStart();
if ( read.getAlignmentEnd() > rightmostIndex )
rightmostIndex = read.getAlignmentEnd();
if ( this.maxReads > 0 && TraversalStatistics.nRecords > this.maxReads ) {
logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords));
done = true;
}
}
GenomeLoc window = GenomeLocParser.createGenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex);
LocusContext locus = new LocusContext(window, reads, null);
if ( DOWNSAMPLE_BY_COVERAGE )
locus.downsampleToCoverage(downsamplingCoverage);
return locus;
}
}