From d4de68e260a0fa21a890e82e5e98c790f2e89fef Mon Sep 17 00:00:00 2001 From: aaron Date: Tue, 28 Apr 2009 19:49:58 +0000 Subject: [PATCH] added changes for the readsTraversal to accomidate design changes git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@553 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/dataSources/shards/ReadShard.java | 14 ++++++++--- .../dataSources/shards/ReadShardStrategy.java | 4 ++-- .../shards/ShardStrategyFactory.java | 11 ++++++--- .../sting/gatk/traversals/TraverseReads.java | 24 +++++++++---------- 4 files changed, 32 insertions(+), 21 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadShard.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadShard.java index f2fba766c..cfb410207 100755 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadShard.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadShard.java @@ -31,22 +31,24 @@ import org.broadinstitute.sting.utils.GenomeLoc; public class ReadShard implements Shard { // the count of the reads we want to copy off - int size = 0; + private int size = 0; // this is going to get gross private final ReadShardStrategy str; /** * create a read shard, given a read size + * * @param size */ public ReadShard(int size) { this.str = null; - this.size = size; + this.size = size; } /** * create a read shard, given a read size + * * @param size */ ReadShard(ReadShardStrategy caller, int size) { @@ -56,9 +58,15 @@ public class ReadShard implements Shard { /** @return the genome location represented by this shard */ public GenomeLoc getGenomeLoc() { - return null; //To change body of implemented methods use File | Settings | File Templates. + throw new UnsupportedOperationException("Reads based sharding isn't genome loc aware"); } + /** @return the genome location represented by this shard */ + public int getSize() { + return size; + } + + /** * what kind of shard do we return * diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadShardStrategy.java index 9d7d7287a..ba3f99baf 100755 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadShardStrategy.java @@ -36,7 +36,7 @@ public class ReadShardStrategy implements ShardStrategy { private boolean unMappedReads = true; // our read bucket size, default - public long readCount = 100000; + protected long readCount = 100000L; // our sequence dictionary final private SAMSequenceDictionary dic; @@ -60,7 +60,7 @@ public class ReadShardStrategy implements ShardStrategy { } public Shard next() { - return null; //To change body of implemented methods use File | Settings | File Templates. + return new ReadShard((int)readCount); //To change body of implemented methods use File | Settings | File Templates. } public void remove() { diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java index 39af328f0..a5b0d3745 100644 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java @@ -56,6 +56,8 @@ public class ShardStrategyFactory { return new LinearLocusShardStrategy(dic, startingSize); case EXPONENTIAL: return new ExpGrowthLocusShardStrategy(dic, startingSize); + case READS: + return new ReadShardStrategy(dic, startingSize); default: throw new RuntimeException("Strategy: " + strat + " isn't implemented"); } @@ -93,9 +95,12 @@ public class ShardStrategyFactory { static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, List lst) { switch (strat) { case LINEAR: - return new LinearLocusShardStrategy(dic, startingSize , lst); + return new LinearLocusShardStrategy(dic, startingSize, lst); case EXPONENTIAL: - return new ExpGrowthLocusShardStrategy(dic, startingSize , lst); + return new ExpGrowthLocusShardStrategy(dic, startingSize, lst); + case READS: + // return new ReadShardStrategy(dic, startingSize); + throw new RuntimeException("Strategy: " + strat + " isn't implemented for intervals"); default: throw new RuntimeException("Strategy: " + strat + " isn't implemented"); } @@ -109,7 +114,7 @@ public class ShardStrategyFactory { * @return */ static public ShardStrategy shatterByReadCount(SAMSequenceDictionary dic, long readCount) { - return null; + return new ReadShardStrategy(dic, readCount); } } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java index e4174939e..3da9db656 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java @@ -4,9 +4,9 @@ import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextProvider; +import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard; import org.broadinstitute.sting.gatk.dataSources.shards.Shard; import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.ReadWalker; @@ -15,6 +15,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import java.io.File; import java.util.List; +import java.util.Arrays; /** * @@ -77,13 +78,14 @@ public class TraverseReads extends TraversalEngine { BoundedReadIterator iter, T sum) { - logger.debug(String.format("TraverseReads.traverse Genomic interval is %s", shard.getGenomeLoc())); + logger.debug(String.format("TraverseReads.traverse Genomic interval is %s", ((ReadShard)shard).getSize())); if (!(walker instanceof ReadWalker)) throw new IllegalArgumentException("Walker isn't a read walker!"); ReadWalker readWalker = (ReadWalker) walker; - GenomeLoc loc = shard.getGenomeLoc(); + + int readCNT = 0; // while we still have more reads for (SAMRecord read: iter) { @@ -91,19 +93,15 @@ public class TraverseReads extends TraversalEngine { // get the genome loc from the read GenomeLoc site = new GenomeLoc(read); + // Jump forward in the reference to this locus location + LocusContext locus = new LocusContext(site, Arrays.asList(read), Arrays.asList(0)); + // update the number of reads we've seen TraversalStatistics.nRecords++; - // Iterate forward to get all reference ordered data covering this locus - final RefMetaDataTracker tracker = getReferenceOrderedDataAtLocus(site); - //ReferenceIterator refSite = referenceProvider.getReferenceSequence(site); - - LocusContext locus = locusProvider.getLocusContext(site); - //locus.setReferenceContig(refSite.getCurrentContig()); - - if (DOWNSAMPLE_BY_COVERAGE) - locus.downsampleToCoverage(downsamplingCoverage); - + // we still have to fix the locus context provider to take care of this problem with > 1 length contexts + // LocusContext locus = locusProvider.getLocusContext(site); + final boolean keepMeP = readWalker.filter(locus, read); if (keepMeP) { M x = readWalker.map(locus, read);