added changes for the readsTraversal to accomidate design changes

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@553 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-04-28 19:49:58 +00:00
parent b6874f30cb
commit d4de68e260
4 changed files with 32 additions and 21 deletions

View File

@ -31,22 +31,24 @@ import org.broadinstitute.sting.utils.GenomeLoc;
public class ReadShard implements Shard {
// the count of the reads we want to copy off
int size = 0;
private int size = 0;
// this is going to get gross
private final ReadShardStrategy str;
/**
* create a read shard, given a read size
*
* @param size
*/
public ReadShard(int size) {
this.str = null;
this.size = size;
this.size = size;
}
/**
* create a read shard, given a read size
*
* @param size
*/
ReadShard(ReadShardStrategy caller, int size) {
@ -56,9 +58,15 @@ public class ReadShard implements Shard {
/** @return the genome location represented by this shard */
public GenomeLoc getGenomeLoc() {
return null; //To change body of implemented methods use File | Settings | File Templates.
throw new UnsupportedOperationException("Reads based sharding isn't genome loc aware");
}
/** @return the genome location represented by this shard */
public int getSize() {
return size;
}
/**
* what kind of shard do we return
*

View File

@ -36,7 +36,7 @@ public class ReadShardStrategy implements ShardStrategy {
private boolean unMappedReads = true;
// our read bucket size, default
public long readCount = 100000;
protected long readCount = 100000L;
// our sequence dictionary
final private SAMSequenceDictionary dic;
@ -60,7 +60,7 @@ public class ReadShardStrategy implements ShardStrategy {
}
public Shard next() {
return null; //To change body of implemented methods use File | Settings | File Templates.
return new ReadShard((int)readCount); //To change body of implemented methods use File | Settings | File Templates.
}
public void remove() {

View File

@ -56,6 +56,8 @@ public class ShardStrategyFactory {
return new LinearLocusShardStrategy(dic, startingSize);
case EXPONENTIAL:
return new ExpGrowthLocusShardStrategy(dic, startingSize);
case READS:
return new ReadShardStrategy(dic, startingSize);
default:
throw new RuntimeException("Strategy: " + strat + " isn't implemented");
}
@ -93,9 +95,12 @@ public class ShardStrategyFactory {
static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, List<GenomeLoc> lst) {
switch (strat) {
case LINEAR:
return new LinearLocusShardStrategy(dic, startingSize , lst);
return new LinearLocusShardStrategy(dic, startingSize, lst);
case EXPONENTIAL:
return new ExpGrowthLocusShardStrategy(dic, startingSize , lst);
return new ExpGrowthLocusShardStrategy(dic, startingSize, lst);
case READS:
// return new ReadShardStrategy(dic, startingSize);
throw new RuntimeException("Strategy: " + strat + " isn't implemented for intervals");
default:
throw new RuntimeException("Strategy: " + strat + " isn't implemented");
}
@ -109,7 +114,7 @@ public class ShardStrategyFactory {
* @return
*/
static public ShardStrategy shatterByReadCount(SAMSequenceDictionary dic, long readCount) {
return null;
return new ReadShardStrategy(dic, readCount);
}
}

View File

@ -4,9 +4,9 @@ import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextProvider;
import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard;
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
@ -15,6 +15,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import java.io.File;
import java.util.List;
import java.util.Arrays;
/**
*
@ -77,13 +78,14 @@ public class TraverseReads extends TraversalEngine {
BoundedReadIterator iter,
T sum) {
logger.debug(String.format("TraverseReads.traverse Genomic interval is %s", shard.getGenomeLoc()));
logger.debug(String.format("TraverseReads.traverse Genomic interval is %s", ((ReadShard)shard).getSize()));
if (!(walker instanceof ReadWalker))
throw new IllegalArgumentException("Walker isn't a read walker!");
ReadWalker<M, T> readWalker = (ReadWalker<M, T>) walker;
GenomeLoc loc = shard.getGenomeLoc();
int readCNT = 0;
// while we still have more reads
for (SAMRecord read: iter) {
@ -91,19 +93,15 @@ public class TraverseReads extends TraversalEngine {
// get the genome loc from the read
GenomeLoc site = new GenomeLoc(read);
// Jump forward in the reference to this locus location
LocusContext locus = new LocusContext(site, Arrays.asList(read), Arrays.asList(0));
// update the number of reads we've seen
TraversalStatistics.nRecords++;
// Iterate forward to get all reference ordered data covering this locus
final RefMetaDataTracker tracker = getReferenceOrderedDataAtLocus(site);
//ReferenceIterator refSite = referenceProvider.getReferenceSequence(site);
LocusContext locus = locusProvider.getLocusContext(site);
//locus.setReferenceContig(refSite.getCurrentContig());
if (DOWNSAMPLE_BY_COVERAGE)
locus.downsampleToCoverage(downsamplingCoverage);
// we still have to fix the locus context provider to take care of this problem with > 1 length contexts
// LocusContext locus = locusProvider.getLocusContext(site);
final boolean keepMeP = readWalker.filter(locus, read);
if (keepMeP) {
M x = readWalker.map(locus, read);