Sharding system fixes for corner cases generally related to lack of coverage

in the BAM file.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2928 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-03-04 18:59:21 +00:00
parent a4d494c38b
commit adea38fd5e
3 changed files with 37 additions and 20 deletions

View File

@ -329,8 +329,8 @@
<junit printsummary="yes" showoutput="yes" maxmemory="${test.maxmemory}" clonevm="yes" haltonfailure="${halt}" failureProperty="test.failure"> <junit printsummary="yes" showoutput="yes" maxmemory="${test.maxmemory}" clonevm="yes" haltonfailure="${halt}" failureProperty="test.failure">
<formatter type="${testOutput}" usefile="${usefile}" /> <formatter type="${testOutput}" usefile="${usefile}" />
<classpath> <classpath>
<path refid="runtime.dependencies"/>
<pathelement location="${dist.dir}/GenomeAnalysisTK.jar"/> <pathelement location="${dist.dir}/GenomeAnalysisTK.jar"/>
<path refid="runtime.dependencies"/>
<pathelement location="${test.classes}"/> <pathelement location="${test.classes}"/>
<pathelement location="lib/junit-4.4.jar"/> <pathelement location="lib/junit-4.4.jar"/>
</classpath> </classpath>

View File

@ -63,25 +63,35 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
* @param locations List of locations for which to load data. * @param locations List of locations for which to load data.
*/ */
IndexDelimitedLocusShardStrategy(SAMDataSource dataSource, GenomeLocSortedSet locations) { IndexDelimitedLocusShardStrategy(SAMDataSource dataSource, GenomeLocSortedSet locations) {
if(!(dataSource instanceof BlockDrivenSAMDataSource)) if(dataSource != null) {
throw new StingException("Cannot power an IndexDelimitedLocusShardStrategy with this data source."); // Shard based on reads.
// TODO: Push this sharding into the data source.
if(!(dataSource instanceof BlockDrivenSAMDataSource))
throw new StingException("Cannot power an IndexDelimitedLocusShardStrategy with this data source.");
List<GenomeLoc> intervals; List<GenomeLoc> intervals;
if(locations == null) { if(locations == null) {
// If no locations were passed in, shard the entire BAM file. // If no locations were passed in, shard the entire BAM file.
SAMFileHeader header = dataSource.getHeader(); SAMFileHeader header = dataSource.getHeader();
intervals = new ArrayList<GenomeLoc>(); intervals = new ArrayList<GenomeLoc>();
for(SAMSequenceRecord sequenceRecord: header.getSequenceDictionary().getSequences()) for(SAMSequenceRecord sequenceRecord: header.getSequenceDictionary().getSequences())
intervals.add(GenomeLocParser.createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength())); intervals.add(GenomeLocParser.createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength()));
}
else
intervals = locations.toList();
this.dataSource = (BlockDrivenSAMDataSource)dataSource;
filePointers.addAll(IntervalSharder.shardIntervals(this.dataSource,intervals,this.dataSource.getNumIndexLevels()-1));
}
else {
this.dataSource = null;
for(GenomeLoc interval: locations)
filePointers.add(new FilePointer(interval));
} }
else
intervals = locations.toList();
filePointerIterator = filePointers.iterator();
this.dataSource = (BlockDrivenSAMDataSource)dataSource;
filePointers.addAll(IntervalSharder.shardIntervals(this.dataSource,intervals,this.dataSource.getNumIndexLevels()-1));
filePointerIterator = filePointers.iterator();
} }
/** /**
@ -100,7 +110,7 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
*/ */
public IndexDelimitedLocusShard next() { public IndexDelimitedLocusShard next() {
FilePointer nextFilePointer = filePointerIterator.next(); FilePointer nextFilePointer = filePointerIterator.next();
Map<SAMFileReader2,List<Chunk>> chunksBounding = dataSource.getFilePointersBounding(nextFilePointer.bin); Map<SAMFileReader2,List<Chunk>> chunksBounding = dataSource!=null ? dataSource.getFilePointersBounding(nextFilePointer.bin) : null;
return new IndexDelimitedLocusShard(nextFilePointer.locations,chunksBounding,Shard.ShardType.LOCUS_INTERVAL); return new IndexDelimitedLocusShard(nextFilePointer.locations,chunksBounding,Shard.ShardType.LOCUS_INTERVAL);
} }

View File

@ -64,8 +64,12 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
ReadGroupMapping mapping = new ReadGroupMapping(); ReadGroupMapping mapping = new ReadGroupMapping();
List<SAMReadGroupRecord> readGroups = reader.getFileHeader().getReadGroups(); List<SAMReadGroupRecord> readGroups = reader.getFileHeader().getReadGroups();
for(SAMReadGroupRecord readGroup: readGroups) for(SAMReadGroupRecord readGroup: readGroups) {
mapping.put(readGroup.getReadGroupId(),headerMerger.getReadGroupId(reader,readGroup.getReadGroupId())); if(headerMerger.hasReadGroupCollisions())
mapping.put(readGroup.getReadGroupId(),headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()));
else
mapping.put(readGroup.getReadGroupId(),readGroup.getReadGroupId());
}
mergedReadGroupMappings.put(id,mapping); mergedReadGroupMappings.put(id,mapping);
} }
@ -110,7 +114,10 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
Map<SAMFileReader2,List<Chunk>> filePointers = new HashMap<SAMFileReader2,List<Chunk>>(); Map<SAMFileReader2,List<Chunk>> filePointers = new HashMap<SAMFileReader2,List<Chunk>>();
for(SAMFileReader reader: readers) { for(SAMFileReader reader: readers) {
SAMFileReader2 reader2 = (SAMFileReader2)reader; SAMFileReader2 reader2 = (SAMFileReader2)reader;
filePointers.put(reader2,reader2.getFilePointersBounding(bin)); if(bin != null)
filePointers.put(reader2,reader2.getFilePointersBounding(bin));
else
filePointers.put(reader2,Collections.<Chunk>emptyList());
} }
return filePointers; return filePointers;
} }