2009-04-10 04:28:17 +08:00
|
|
|
package org.broadinstitute.sting.gatk.traversals;
|
|
|
|
|
|
2009-07-10 05:57:00 +08:00
|
|
|
import org.apache.log4j.Logger;
|
2009-05-16 05:02:12 +08:00
|
|
|
import org.broadinstitute.sting.gatk.WalkerManager;
|
2009-11-12 14:18:10 +08:00
|
|
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
2009-08-05 05:01:37 +08:00
|
|
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
2009-07-10 05:57:00 +08:00
|
|
|
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
2009-04-10 04:28:17 +08:00
|
|
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
2009-07-10 05:57:00 +08:00
|
|
|
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
|
|
|
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|
|
|
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
2009-04-10 04:28:17 +08:00
|
|
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
2009-12-31 05:56:25 +08:00
|
|
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
2010-01-06 20:15:36 +08:00
|
|
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
2010-06-13 09:47:02 +08:00
|
|
|
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
|
2009-04-10 04:28:17 +08:00
|
|
|
|
|
|
|
|
/**
|
2009-05-23 03:12:00 +08:00
|
|
|
* A simple solution to iterating over all reference positions over a series of genomic locations.
|
2009-04-10 04:28:17 +08:00
|
|
|
*/
|
2010-03-12 02:40:31 +08:00
|
|
|
public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,LocusShardDataProvider> {
|
2009-11-12 14:18:10 +08:00
|
|
|
final private static String LOCI_STRING = "sites";
|
2009-04-10 04:28:17 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* our log, which we want to capture anything from this class
|
|
|
|
|
*/
|
|
|
|
|
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
|
|
|
|
|
|
2009-05-09 05:27:54 +08:00
|
|
|
@Override
|
2010-03-12 02:40:31 +08:00
|
|
|
public T traverse( LocusWalker<M,T> walker,
|
|
|
|
|
LocusShardDataProvider dataProvider,
|
|
|
|
|
T sum ) {
|
2010-02-25 08:16:50 +08:00
|
|
|
logger.debug(String.format("TraverseLoci.traverse: Shard is %s", dataProvider));
|
2009-04-10 04:28:17 +08:00
|
|
|
|
2009-05-23 03:12:00 +08:00
|
|
|
LocusView locusView = getLocusView( walker, dataProvider );
|
2009-05-16 05:02:12 +08:00
|
|
|
|
2009-09-13 03:13:15 +08:00
|
|
|
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
|
2009-04-10 04:28:17 +08:00
|
|
|
|
2009-09-13 03:13:15 +08:00
|
|
|
//ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
|
|
|
|
ReferenceOrderedView referenceOrderedDataView = null;
|
2010-01-29 11:14:26 +08:00
|
|
|
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
|
2009-09-13 03:13:15 +08:00
|
|
|
referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
|
|
|
|
|
else
|
|
|
|
|
referenceOrderedDataView = (RodLocusView)locusView;
|
2009-04-10 04:28:17 +08:00
|
|
|
|
2009-09-13 03:13:15 +08:00
|
|
|
LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
|
|
|
|
|
|
|
|
|
// We keep processing while the next reference location is within the interval
|
|
|
|
|
while( locusView.hasNext() ) {
|
|
|
|
|
AlignmentContext locus = locusView.next();
|
2009-12-31 05:56:25 +08:00
|
|
|
GenomeLoc location = locus.getLocation();
|
2009-09-13 03:13:15 +08:00
|
|
|
|
|
|
|
|
TraversalStatistics.nRecords++;
|
|
|
|
|
|
2009-12-31 05:56:25 +08:00
|
|
|
if ( locus.hasExtendedEventPileup() ) {
|
|
|
|
|
// if the alignment context we received holds an "extended" pileup (i.e. pileup of insertions/deletions
|
|
|
|
|
// associated with the current site), we need to update the location. The updated location still starts
|
|
|
|
|
// at the current genomic position, but it has to span the length of the longest deletion (if any).
|
|
|
|
|
location = GenomeLocParser.setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength());
|
2010-01-08 06:37:22 +08:00
|
|
|
|
|
|
|
|
// it is possible that the new expanded location spans the current shard boundary; the next method ensures
|
|
|
|
|
// that when it is the case, the reference sequence held by the ReferenceView will be reloaded so that
|
|
|
|
|
// the view has all the bases we are gonna need. If the location fits within the current view bounds,
|
|
|
|
|
// the next call will not do anything to the view:
|
|
|
|
|
referenceView.expandBoundsToAccomodateLoc(location);
|
2009-12-31 05:56:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Iterate forward to get all reference ordered data covering this location
|
2010-01-08 05:30:55 +08:00
|
|
|
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation());
|
2009-04-10 04:28:17 +08:00
|
|
|
|
2009-12-31 05:56:25 +08:00
|
|
|
// create reference context. Note that if we have a pileup of "extended events", the context will
|
|
|
|
|
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
|
|
|
|
ReferenceContext refContext = referenceView.getReferenceContext(location);
|
2009-09-13 03:13:15 +08:00
|
|
|
|
2010-03-12 02:40:31 +08:00
|
|
|
final boolean keepMeP = walker.filter(tracker, refContext, locus);
|
2009-09-13 03:13:15 +08:00
|
|
|
if (keepMeP) {
|
2010-03-12 02:40:31 +08:00
|
|
|
M x = walker.map(tracker, refContext, locus);
|
|
|
|
|
sum = walker.reduce(x, sum);
|
2009-09-13 03:13:15 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (this.maximumIterations > 0 && TraversalStatistics.nRecords > this.maximumIterations) {
|
|
|
|
|
logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2009-11-12 14:18:10 +08:00
|
|
|
printProgress(LOCI_STRING, locus.getLocation());
|
2009-04-10 04:28:17 +08:00
|
|
|
}
|
2009-09-13 03:13:15 +08:00
|
|
|
}
|
2009-04-10 04:28:17 +08:00
|
|
|
|
2009-09-13 03:13:15 +08:00
|
|
|
// We have a final map call to execute here to clean up the skipped based from the
|
|
|
|
|
// last position in the ROD to that in the interval
|
2010-01-29 11:14:26 +08:00
|
|
|
if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA ) {
|
2009-09-13 03:13:15 +08:00
|
|
|
RodLocusView rodLocusView = (RodLocusView)locusView;
|
|
|
|
|
long nSkipped = rodLocusView.getLastSkippedBases();
|
|
|
|
|
if ( nSkipped > 0 ) {
|
2010-01-06 20:15:36 +08:00
|
|
|
GenomeLoc site = rodLocusView.getLocOneBeyondShard();
|
2010-06-13 09:47:02 +08:00
|
|
|
AlignmentContext ac = new AlignmentContext(site, new UnifiedReadBackedPileup(site), nSkipped);
|
2010-03-12 02:40:31 +08:00
|
|
|
M x = walker.map(null, null, ac);
|
|
|
|
|
sum = walker.reduce(x, sum);
|
2009-09-13 03:13:15 +08:00
|
|
|
}
|
2009-04-10 04:28:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
2009-05-08 22:12:45 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Temporary override of printOnTraversalDone.
|
2009-09-13 03:13:15 +08:00
|
|
|
*
|
2009-05-08 22:12:45 +08:00
|
|
|
* @param sum Result of the computation.
|
|
|
|
|
*/
|
2010-03-12 02:40:31 +08:00
|
|
|
public void printOnTraversalDone( T sum ) {
|
2009-11-12 14:18:10 +08:00
|
|
|
printOnTraversalDone(LOCI_STRING, sum );
|
2009-05-08 22:12:45 +08:00
|
|
|
}
|
2009-05-23 03:12:00 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Gets the best view of loci for this walker given the available data.
|
|
|
|
|
* @param walker walker to interrogate.
|
|
|
|
|
* @param dataProvider Data which which to drive the locus view.
|
2010-03-12 02:40:31 +08:00
|
|
|
* @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal.
|
2009-05-23 03:12:00 +08:00
|
|
|
*/
|
2010-03-12 02:40:31 +08:00
|
|
|
private LocusView getLocusView( Walker<M,T> walker, LocusShardDataProvider dataProvider ) {
|
2009-05-23 03:12:00 +08:00
|
|
|
DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
|
|
|
|
|
if( dataSource == DataSource.READS )
|
|
|
|
|
return new CoveredLocusView(dataProvider);
|
2010-01-16 05:07:49 +08:00
|
|
|
else if( dataSource == DataSource.REFERENCE ) //|| ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers )
|
2009-05-23 03:12:00 +08:00
|
|
|
return new AllLocusView(dataProvider);
|
2009-09-13 03:13:15 +08:00
|
|
|
else if( dataSource == DataSource.REFERENCE_ORDERED_DATA )
|
|
|
|
|
return new RodLocusView(dataProvider);
|
2009-05-23 03:12:00 +08:00
|
|
|
else
|
|
|
|
|
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
|
|
|
|
}
|
2009-04-25 03:40:21 +08:00
|
|
|
}
|