Sketch of new version of TraverseByLocusWindow, and a flag to conditionally turn it on.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1097 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4e04370f14
commit
102b38c055
|
|
@ -140,6 +140,10 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false)
|
||||
public Boolean disableThreading = false;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "use_new_tblw", shortName= "tblw", doc="Use the legacy traverse by locus window.", required = false)
|
||||
public Boolean useNewTraverseByLocusWindow = false;
|
||||
|
||||
/** How many threads should be allocated to this analysis. */
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "numthreads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ public class GenomeAnalysisEngine {
|
|||
|
||||
// if we're a read or a locus walker, we use the new system. Right now we have complicated
|
||||
// branching based on the input data, but this should disapear when all the traversals are switched over
|
||||
if (my_walker instanceof LocusWalker || my_walker instanceof ReadWalker || my_walker instanceof DuplicateWalker) {
|
||||
if (!(my_walker instanceof LocusWindowWalker) && !args.useNewTraverseByLocusWindow) {
|
||||
microScheduler = createMicroscheduler(my_walker, rods);
|
||||
} else { // we have an old style traversal, once we're done return
|
||||
legacyTraversal(my_walker, rods);
|
||||
|
|
|
|||
|
|
@ -54,6 +54,19 @@ public class LocusReferenceView extends ReferenceView {
|
|||
return StringUtil.bytesToString( referenceSequence.getBases(), offset, 1 ).charAt(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Allow the user to pull reference info from any arbitrary region of the reference.
|
||||
* Assume the user has already performed all necessary bounds checking.
|
||||
* TODO: This function is nearly identical to that in the ReadReferenceView. Merge the common functionality.
|
||||
* @param genomeLoc The locus.
|
||||
* @return A list of the bases starting at the start of the locus (inclusive) and ending
|
||||
* at the end of the locus (inclusive).
|
||||
*/
|
||||
public char[] getReferenceBases( GenomeLoc genomeLoc ) {
|
||||
ReferenceSequence subsequence = reference.getSubsequenceAt(genomeLoc.getContig(),genomeLoc.getStart(),genomeLoc.getStop());
|
||||
return StringUtil.bytesToString(subsequence.getBases()).toCharArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates that the genomeLoc is one base wide and is in the reference sequence.
|
||||
* @param genomeLoc location to verify.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,97 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: ebanks
|
||||
* Date: Apr 23, 2009
|
||||
* Time: 10:26:03 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class TraverseLocusWindows extends TraversalEngine {
|
||||
|
||||
public TraverseLocusWindows(List<File> reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||
super(reads, ref, rods);
|
||||
}
|
||||
|
||||
public <M,T> T traverse( Walker<M,T> walker,
|
||||
Shard shard,
|
||||
ShardDataProvider dataProvider,
|
||||
T sum ) {
|
||||
|
||||
if ( !(walker instanceof LocusWindowWalker) )
|
||||
throw new IllegalArgumentException("Walker isn't a locus window walker!");
|
||||
|
||||
LocusWindowWalker<M, T> locusWindowWalker = (LocusWindowWalker<M, T>)walker;
|
||||
|
||||
GenomeLoc interval = shard.getGenomeLoc();
|
||||
|
||||
ReadView readView = new ReadView( dataProvider );
|
||||
LocusReferenceView referenceView = new LocusReferenceView( dataProvider );
|
||||
ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
||||
|
||||
LocusContext locus = getLocusContext(readView.iterator(), interval);
|
||||
|
||||
String referenceSubsequence = new String(referenceView.getReferenceBases(interval));
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this interval
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation());
|
||||
|
||||
//
|
||||
// Execute our contract with the walker. Call filter, map, and reduce
|
||||
//
|
||||
final boolean keepMeP = locusWindowWalker.filter(tracker, referenceSubsequence, locus);
|
||||
if (keepMeP) {
|
||||
M x = locusWindowWalker.map(tracker, referenceSubsequence, locus);
|
||||
sum = locusWindowWalker.reduce(x, sum);
|
||||
}
|
||||
|
||||
printProgress("intervals", locus.getLocation());
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
private LocusContext getLocusContext(Iterator<SAMRecord> readIter, GenomeLoc interval) {
|
||||
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
boolean done = false;
|
||||
long leftmostIndex = interval.getStart(),
|
||||
rightmostIndex = interval.getStop();
|
||||
while (readIter.hasNext() && !done) {
|
||||
TraversalStatistics.nRecords++;
|
||||
|
||||
SAMRecord read = readIter.next();
|
||||
reads.add(read);
|
||||
if ( read.getAlignmentStart() < leftmostIndex )
|
||||
leftmostIndex = read.getAlignmentStart();
|
||||
if ( read.getAlignmentEnd() > rightmostIndex )
|
||||
rightmostIndex = read.getAlignmentEnd();
|
||||
if ( this.maxReads > 0 && TraversalStatistics.nRecords > this.maxReads ) {
|
||||
logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords));
|
||||
done = true;
|
||||
}
|
||||
}
|
||||
|
||||
GenomeLoc window = GenomeLocParser.createGenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex);
|
||||
LocusContext locus = new LocusContext(window, reads, null);
|
||||
if ( DOWNSAMPLE_BY_COVERAGE )
|
||||
locus.downsampleToCoverage(downsamplingCoverage);
|
||||
|
||||
return locus;
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue