Sharding system fixes for corner cases generally related to lack of coverage

in the BAM file.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2928 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-03-04 18:59:21 +00:00
parent a4d494c38b
commit adea38fd5e
3 changed files with 37 additions and 20 deletions

View File

@ -329,8 +329,8 @@
<junit printsummary="yes" showoutput="yes" maxmemory="${test.maxmemory}" clonevm="yes" haltonfailure="${halt}" failureProperty="test.failure">
<formatter type="${testOutput}" usefile="${usefile}" />
<classpath>
<path refid="runtime.dependencies"/>
<pathelement location="${dist.dir}/GenomeAnalysisTK.jar"/>
<path refid="runtime.dependencies"/>
<pathelement location="${test.classes}"/>
<pathelement location="lib/junit-4.4.jar"/>
</classpath>

View File

@ -63,25 +63,35 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
* @param locations List of locations for which to load data.
*/
IndexDelimitedLocusShardStrategy(SAMDataSource dataSource, GenomeLocSortedSet locations) {
if(!(dataSource instanceof BlockDrivenSAMDataSource))
throw new StingException("Cannot power an IndexDelimitedLocusShardStrategy with this data source.");
if(dataSource != null) {
// Shard based on reads.
// TODO: Push this sharding into the data source.
if(!(dataSource instanceof BlockDrivenSAMDataSource))
throw new StingException("Cannot power an IndexDelimitedLocusShardStrategy with this data source.");
List<GenomeLoc> intervals;
if(locations == null) {
// If no locations were passed in, shard the entire BAM file.
SAMFileHeader header = dataSource.getHeader();
intervals = new ArrayList<GenomeLoc>();
List<GenomeLoc> intervals;
if(locations == null) {
// If no locations were passed in, shard the entire BAM file.
SAMFileHeader header = dataSource.getHeader();
intervals = new ArrayList<GenomeLoc>();
for(SAMSequenceRecord sequenceRecord: header.getSequenceDictionary().getSequences())
intervals.add(GenomeLocParser.createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength()));
for(SAMSequenceRecord sequenceRecord: header.getSequenceDictionary().getSequences())
intervals.add(GenomeLocParser.createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength()));
}
else
intervals = locations.toList();
this.dataSource = (BlockDrivenSAMDataSource)dataSource;
filePointers.addAll(IntervalSharder.shardIntervals(this.dataSource,intervals,this.dataSource.getNumIndexLevels()-1));
}
else {
this.dataSource = null;
for(GenomeLoc interval: locations)
filePointers.add(new FilePointer(interval));
}
else
intervals = locations.toList();
this.dataSource = (BlockDrivenSAMDataSource)dataSource;
filePointers.addAll(IntervalSharder.shardIntervals(this.dataSource,intervals,this.dataSource.getNumIndexLevels()-1));
filePointerIterator = filePointers.iterator();
filePointerIterator = filePointers.iterator();
}
/**
@ -100,7 +110,7 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
*/
public IndexDelimitedLocusShard next() {
FilePointer nextFilePointer = filePointerIterator.next();
Map<SAMFileReader2,List<Chunk>> chunksBounding = dataSource.getFilePointersBounding(nextFilePointer.bin);
Map<SAMFileReader2,List<Chunk>> chunksBounding = dataSource!=null ? dataSource.getFilePointersBounding(nextFilePointer.bin) : null;
return new IndexDelimitedLocusShard(nextFilePointer.locations,chunksBounding,Shard.ShardType.LOCUS_INTERVAL);
}

View File

@ -64,8 +64,12 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
ReadGroupMapping mapping = new ReadGroupMapping();
List<SAMReadGroupRecord> readGroups = reader.getFileHeader().getReadGroups();
for(SAMReadGroupRecord readGroup: readGroups)
mapping.put(readGroup.getReadGroupId(),headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()));
for(SAMReadGroupRecord readGroup: readGroups) {
if(headerMerger.hasReadGroupCollisions())
mapping.put(readGroup.getReadGroupId(),headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()));
else
mapping.put(readGroup.getReadGroupId(),readGroup.getReadGroupId());
}
mergedReadGroupMappings.put(id,mapping);
}
@ -110,7 +114,10 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
Map<SAMFileReader2,List<Chunk>> filePointers = new HashMap<SAMFileReader2,List<Chunk>>();
for(SAMFileReader reader: readers) {
SAMFileReader2 reader2 = (SAMFileReader2)reader;
filePointers.put(reader2,reader2.getFilePointersBounding(bin));
if(bin != null)
filePointers.put(reader2,reader2.getFilePointersBounding(bin));
else
filePointers.put(reader2,Collections.<Chunk>emptyList());
}
return filePointers;
}