Sketch of new version of TraverseByLocusWindow, and a flag to conditionally turn it on.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1097 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4e04370f14
commit
102b38c055
|
|
@ -140,6 +140,10 @@ public class GATKArgumentCollection {
|
||||||
@Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false)
|
@Argument(fullName = "disablethreading", shortName = "dt", doc = "Disable experimental threading support.", required = false)
|
||||||
public Boolean disableThreading = false;
|
public Boolean disableThreading = false;
|
||||||
|
|
||||||
|
@Element(required = false)
|
||||||
|
@Argument(fullName = "use_new_tblw", shortName= "tblw", doc="Use the legacy traverse by locus window.", required = false)
|
||||||
|
public Boolean useNewTraverseByLocusWindow = false;
|
||||||
|
|
||||||
/** How many threads should be allocated to this analysis. */
|
/** How many threads should be allocated to this analysis. */
|
||||||
@Element(required = false)
|
@Element(required = false)
|
||||||
@Argument(fullName = "numthreads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
|
@Argument(fullName = "numthreads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
|
||||||
|
|
|
||||||
|
|
@ -116,7 +116,7 @@ public class GenomeAnalysisEngine {
|
||||||
|
|
||||||
// if we're a read or a locus walker, we use the new system. Right now we have complicated
|
// if we're a read or a locus walker, we use the new system. Right now we have complicated
|
||||||
// branching based on the input data, but this should disapear when all the traversals are switched over
|
// branching based on the input data, but this should disapear when all the traversals are switched over
|
||||||
if (my_walker instanceof LocusWalker || my_walker instanceof ReadWalker || my_walker instanceof DuplicateWalker) {
|
if (!(my_walker instanceof LocusWindowWalker) && !args.useNewTraverseByLocusWindow) {
|
||||||
microScheduler = createMicroscheduler(my_walker, rods);
|
microScheduler = createMicroscheduler(my_walker, rods);
|
||||||
} else { // we have an old style traversal, once we're done return
|
} else { // we have an old style traversal, once we're done return
|
||||||
legacyTraversal(my_walker, rods);
|
legacyTraversal(my_walker, rods);
|
||||||
|
|
|
||||||
|
|
@ -54,6 +54,19 @@ public class LocusReferenceView extends ReferenceView {
|
||||||
return StringUtil.bytesToString( referenceSequence.getBases(), offset, 1 ).charAt(0);
|
return StringUtil.bytesToString( referenceSequence.getBases(), offset, 1 ).charAt(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allow the user to pull reference info from any arbitrary region of the reference.
|
||||||
|
* Assume the user has already performed all necessary bounds checking.
|
||||||
|
* TODO: This function is nearly identical to that in the ReadReferenceView. Merge the common functionality.
|
||||||
|
* @param genomeLoc The locus.
|
||||||
|
* @return A list of the bases starting at the start of the locus (inclusive) and ending
|
||||||
|
* at the end of the locus (inclusive).
|
||||||
|
*/
|
||||||
|
public char[] getReferenceBases( GenomeLoc genomeLoc ) {
|
||||||
|
ReferenceSequence subsequence = reference.getSubsequenceAt(genomeLoc.getContig(),genomeLoc.getStart(),genomeLoc.getStop());
|
||||||
|
return StringUtil.bytesToString(subsequence.getBases()).toCharArray();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates that the genomeLoc is one base wide and is in the reference sequence.
|
* Validates that the genomeLoc is one base wide and is in the reference sequence.
|
||||||
* @param genomeLoc location to verify.
|
* @param genomeLoc location to verify.
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,97 @@
|
||||||
|
package org.broadinstitute.sting.gatk.traversals;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: ebanks
|
||||||
|
* Date: Apr 23, 2009
|
||||||
|
* Time: 10:26:03 AM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public class TraverseLocusWindows extends TraversalEngine {
|
||||||
|
|
||||||
|
public TraverseLocusWindows(List<File> reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||||
|
super(reads, ref, rods);
|
||||||
|
}
|
||||||
|
|
||||||
|
public <M,T> T traverse( Walker<M,T> walker,
|
||||||
|
Shard shard,
|
||||||
|
ShardDataProvider dataProvider,
|
||||||
|
T sum ) {
|
||||||
|
|
||||||
|
if ( !(walker instanceof LocusWindowWalker) )
|
||||||
|
throw new IllegalArgumentException("Walker isn't a locus window walker!");
|
||||||
|
|
||||||
|
LocusWindowWalker<M, T> locusWindowWalker = (LocusWindowWalker<M, T>)walker;
|
||||||
|
|
||||||
|
GenomeLoc interval = shard.getGenomeLoc();
|
||||||
|
|
||||||
|
ReadView readView = new ReadView( dataProvider );
|
||||||
|
LocusReferenceView referenceView = new LocusReferenceView( dataProvider );
|
||||||
|
ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
||||||
|
|
||||||
|
LocusContext locus = getLocusContext(readView.iterator(), interval);
|
||||||
|
|
||||||
|
String referenceSubsequence = new String(referenceView.getReferenceBases(interval));
|
||||||
|
|
||||||
|
// Iterate forward to get all reference ordered data covering this interval
|
||||||
|
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation());
|
||||||
|
|
||||||
|
//
|
||||||
|
// Execute our contract with the walker. Call filter, map, and reduce
|
||||||
|
//
|
||||||
|
final boolean keepMeP = locusWindowWalker.filter(tracker, referenceSubsequence, locus);
|
||||||
|
if (keepMeP) {
|
||||||
|
M x = locusWindowWalker.map(tracker, referenceSubsequence, locus);
|
||||||
|
sum = locusWindowWalker.reduce(x, sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
printProgress("intervals", locus.getLocation());
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
private LocusContext getLocusContext(Iterator<SAMRecord> readIter, GenomeLoc interval) {
|
||||||
|
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||||
|
boolean done = false;
|
||||||
|
long leftmostIndex = interval.getStart(),
|
||||||
|
rightmostIndex = interval.getStop();
|
||||||
|
while (readIter.hasNext() && !done) {
|
||||||
|
TraversalStatistics.nRecords++;
|
||||||
|
|
||||||
|
SAMRecord read = readIter.next();
|
||||||
|
reads.add(read);
|
||||||
|
if ( read.getAlignmentStart() < leftmostIndex )
|
||||||
|
leftmostIndex = read.getAlignmentStart();
|
||||||
|
if ( read.getAlignmentEnd() > rightmostIndex )
|
||||||
|
rightmostIndex = read.getAlignmentEnd();
|
||||||
|
if ( this.maxReads > 0 && TraversalStatistics.nRecords > this.maxReads ) {
|
||||||
|
logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords));
|
||||||
|
done = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
GenomeLoc window = GenomeLocParser.createGenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex);
|
||||||
|
LocusContext locus = new LocusContext(window, reads, null);
|
||||||
|
if ( DOWNSAMPLE_BY_COVERAGE )
|
||||||
|
locus.downsampleToCoverage(downsamplingCoverage);
|
||||||
|
|
||||||
|
return locus;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue