From 4213e05aebb08b88787110f3908e766d7b78c134 Mon Sep 17 00:00:00 2001 From: hanna Date: Tue, 6 Jul 2010 04:34:38 +0000 Subject: [PATCH] Fix for sharding ref walkers via monolithic sharding. Introduces the potential bug (for monolithic sharding only) that when traversing by read, map() function will not be called for loci off the end of the reference. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3722 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/GenomeAnalysisEngine.java | 2 +- .../datasources/shards/MonolithicShard.java | 19 ++++++++++++++++--- .../shards/MonolithicShardStrategy.java | 6 ++++-- .../sting/gatk/executive/WindowMaker.java | 6 +----- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 4ea246108..7e5cb30a1 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -743,7 +743,7 @@ public class GenomeAnalysisEngine { else throw new StingException("The GATK cannot currently process unindexed BAM files"); - return new MonolithicShardStrategy(shardType); + return new MonolithicShardStrategy(shardType,drivingDataSource.getSequenceDictionary()); } ShardStrategy shardStrategy = null; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java index 1214beeb2..e42a7a3c0 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java @@ -2,9 +2,14 @@ package org.broadinstitute.sting.gatk.datasources.shards; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.Collections; import java.util.List; +import java.util.ArrayList; + +import net.sf.samtools.SAMSequenceDictionary; +import net.sf.samtools.SAMSequenceRecord; /** * A single, monolithic shard bridging all available data. @@ -15,16 +20,24 @@ public class MonolithicShard implements Shard { /** * What type of MonolithicShard is this? Read or locus? */ - private ShardType shardType; + private final ShardType shardType; + + /** + * Locations. For the monolithic shard, should be a list of all available contigs in the reference. + */ + private final List locs = new ArrayList(); /** * Creates a new monolithic shard of the given type. * @param shardType Type of the shard. Must be either read or locus; cannot be intervalic. + * @param sequenceDictionary the sequence dictionary from which to derive contig info. */ - public MonolithicShard(ShardType shardType) { + public MonolithicShard(ShardType shardType, SAMSequenceDictionary sequenceDictionary) { if(shardType != ShardType.LOCUS && shardType != ShardType.READ) throw new StingException("Invalid shard type for monolithic shard: " + shardType); this.shardType = shardType; + for(SAMSequenceRecord sequenceRecord: sequenceDictionary.getSequences()) + locs.add(GenomeLocParser.createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength())); } /** @@ -32,7 +45,7 @@ public class MonolithicShard implements Shard { * @return null. */ public List getGenomeLocs() { - return Collections.emptyList(); + return locs; } /** diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java index b90f3d147..022213d37 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java @@ -6,6 +6,8 @@ import org.broadinstitute.sting.utils.StingException; import java.util.Iterator; import java.util.NoSuchElementException; +import net.sf.samtools.SAMSequenceDictionary; + /** * Create a giant shard representing all the data in the input BAM(s). * @@ -22,8 +24,8 @@ public class MonolithicShardStrategy implements ShardStrategy { * Create a new shard strategy for shards of the given type. * @param shardType The shard type. */ - public MonolithicShardStrategy(Shard.ShardType shardType) { - shard = new MonolithicShard(shardType); + public MonolithicShardStrategy(final Shard.ShardType shardType, final SAMSequenceDictionary sequenceDictionary) { + shard = new MonolithicShard(shardType,sequenceDictionary); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java b/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java index 24964d51d..ce2b39fb4 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java @@ -122,8 +122,7 @@ public class WindowMaker implements Iterable, I public boolean hasNext() { // locus == null when doing monolithic sharding. - // TODO: Move the monolithic sharding iterator so that we don't have to special case here. - return sourceIterator.hasNext() && (locus == null || sourceIterator.peek().getLocation().overlapsP(locus)); + return sourceIterator.hasNext() && sourceIterator.peek().getLocation().overlapsP(locus); } public AlignmentContext next() { @@ -137,9 +136,6 @@ public class WindowMaker implements Iterable, I public void seedNextLocus() { // locus == null when doing monolithic sharding. - // TODO: Move the monolithic sharding iterator so that we don't have to special case here. - if(locus == null) return; - while(sourceIterator.hasNext() && sourceIterator.peek().getLocation().isBefore(locus)) sourceIterator.next(); }