2009-04-29 01:55:08 +08:00
|
|
|
package org.broadinstitute.sting.gatk.traversals;
|
|
|
|
|
|
|
|
|
|
import net.sf.samtools.SAMRecord;
|
|
|
|
|
import org.apache.log4j.Logger;
|
2009-06-19 05:03:57 +08:00
|
|
|
import org.broadinstitute.sting.gatk.WalkerManager;
|
2009-06-12 02:13:22 +08:00
|
|
|
import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView;
|
2009-07-10 05:57:00 +08:00
|
|
|
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
|
|
|
|
|
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
|
|
|
|
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
2009-04-29 01:55:08 +08:00
|
|
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
|
|
|
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
2009-06-22 22:39:41 +08:00
|
|
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
2009-04-29 01:55:08 +08:00
|
|
|
|
2009-06-09 22:44:27 +08:00
|
|
|
/*
|
|
|
|
|
* Copyright (c) 2009 The Broad Institute
|
2009-04-29 01:55:08 +08:00
|
|
|
*
|
2009-06-09 22:44:27 +08:00
|
|
|
* Permission is hereby granted, free of charge, to any person
|
|
|
|
|
* obtaining a copy of this software and associated documentation
|
|
|
|
|
* files (the "Software"), to deal in the Software without
|
|
|
|
|
* restriction, including without limitation the rights to use,
|
|
|
|
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
* copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following
|
|
|
|
|
* conditions:
|
2009-04-29 01:55:08 +08:00
|
|
|
*
|
2009-06-09 22:44:27 +08:00
|
|
|
* The above copyright notice and this permission notice shall be
|
|
|
|
|
* included in all copies or substantial portions of the Software.
|
2009-04-29 01:55:08 +08:00
|
|
|
*
|
2009-06-09 22:44:27 +08:00
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
|
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
|
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
|
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
|
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
2009-04-29 01:55:08 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @author aaron
|
|
|
|
|
* @version 1.0
|
|
|
|
|
* @date Apr 24, 2009
|
|
|
|
|
* <p/>
|
|
|
|
|
* Class TraverseReads
|
|
|
|
|
* <p/>
|
|
|
|
|
* This class handles traversing by reads in the new shardable style
|
|
|
|
|
*/
|
2009-10-20 08:39:42 +08:00
|
|
|
public class TraverseReads extends TraversalEngine {
|
2009-04-29 01:55:08 +08:00
|
|
|
/** our log, which we want to capture anything from this class */
|
|
|
|
|
protected static Logger logger = Logger.getLogger(TraverseReads.class);
|
|
|
|
|
|
2009-11-12 14:18:10 +08:00
|
|
|
/** descriptor of the type */
|
|
|
|
|
private static final String READS_STRING = "reads";
|
|
|
|
|
|
2009-04-29 01:55:08 +08:00
|
|
|
/**
|
|
|
|
|
* Traverse by reads, given the data and the walker
|
2009-07-10 05:57:00 +08:00
|
|
|
*
|
2009-06-09 22:44:27 +08:00
|
|
|
* @param walker the walker to traverse with
|
|
|
|
|
* @param dataProvider the provider of the reads data
|
|
|
|
|
* @param sum the value of type T, specified by the walker, to feed to the walkers reduce function
|
|
|
|
|
* @param <M> the map type of the walker
|
|
|
|
|
* @param <T> the reduce type of the walker
|
|
|
|
|
* @return the reduce variable of the read walker
|
2009-04-29 01:55:08 +08:00
|
|
|
*/
|
|
|
|
|
public <M, T> T traverse(Walker<M, T> walker,
|
2009-05-09 05:27:54 +08:00
|
|
|
ShardDataProvider dataProvider,
|
2009-04-29 01:55:08 +08:00
|
|
|
T sum) {
|
|
|
|
|
|
2010-02-25 08:16:50 +08:00
|
|
|
logger.debug(String.format("TraverseReads.traverse Covered dataset is %s", dataProvider));
|
2009-04-29 01:55:08 +08:00
|
|
|
|
|
|
|
|
if (!(walker instanceof ReadWalker))
|
|
|
|
|
throw new IllegalArgumentException("Walker isn't a read walker!");
|
|
|
|
|
|
2009-05-09 05:27:54 +08:00
|
|
|
if( !dataProvider.hasReads() )
|
|
|
|
|
throw new IllegalArgumentException("Unable to traverse reads; no read data is available.");
|
|
|
|
|
|
2009-04-29 01:55:08 +08:00
|
|
|
ReadWalker<M, T> readWalker = (ReadWalker<M, T>) walker;
|
2009-06-19 05:03:57 +08:00
|
|
|
boolean needsReferenceBasesP = WalkerManager.isRequired(walker, DataSource.REFERENCE_BASES);
|
2009-07-10 05:57:00 +08:00
|
|
|
|
2009-05-23 03:12:00 +08:00
|
|
|
ReadView reads = new ReadView(dataProvider);
|
|
|
|
|
ReadReferenceView reference = new ReadReferenceView(dataProvider);
|
|
|
|
|
|
2009-04-29 01:55:08 +08:00
|
|
|
// while we still have more reads
|
2009-05-23 03:12:00 +08:00
|
|
|
for (SAMRecord read : reads) {
|
2009-05-11 11:42:38 +08:00
|
|
|
// an array of characters that represent the reference
|
|
|
|
|
char[] refSeq = null;
|
|
|
|
|
|
2009-12-30 05:06:20 +08:00
|
|
|
// get the array of characters for the reference sequence, since we're a mapped read
|
|
|
|
|
if (needsReferenceBasesP && !read.getReadUnmappedFlag() && dataProvider.hasReference())
|
|
|
|
|
refSeq = reference.getReferenceBases(read);
|
2009-04-29 03:49:58 +08:00
|
|
|
|
2009-04-29 01:55:08 +08:00
|
|
|
// update the number of reads we've seen
|
|
|
|
|
TraversalStatistics.nRecords++;
|
|
|
|
|
|
2009-05-12 06:45:11 +08:00
|
|
|
final boolean keepMeP = readWalker.filter(refSeq, read);
|
2009-04-29 01:55:08 +08:00
|
|
|
if (keepMeP) {
|
2009-05-12 06:45:11 +08:00
|
|
|
M x = readWalker.map(refSeq, read);
|
2009-04-29 01:55:08 +08:00
|
|
|
sum = readWalker.reduce(x, sum);
|
|
|
|
|
}
|
|
|
|
|
|
2009-11-12 14:18:10 +08:00
|
|
|
printProgress(READS_STRING,
|
2009-11-10 13:57:01 +08:00
|
|
|
(read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) ?
|
|
|
|
|
null :
|
|
|
|
|
GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart()));
|
2009-04-29 01:55:08 +08:00
|
|
|
}
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
|
|
|
|
|
2009-05-08 22:12:45 +08:00
|
|
|
/**
|
|
|
|
|
* Temporary override of printOnTraversalDone.
|
|
|
|
|
* TODO: Add some sort of TE.getName() function once all TraversalEngines are ported.
|
|
|
|
|
* @param sum Result of the computation.
|
|
|
|
|
* @param <T> Type of the result.
|
|
|
|
|
*/
|
|
|
|
|
public <T> void printOnTraversalDone( T sum ) {
|
2009-11-12 14:18:10 +08:00
|
|
|
printOnTraversalDone(READS_STRING, sum );
|
2009-05-15 00:52:18 +08:00
|
|
|
}
|
2009-04-29 01:55:08 +08:00
|
|
|
}
|