Sharding system fixes for corner cases generally related to lack of coverage
in the BAM file. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2928 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a4d494c38b
commit
adea38fd5e
|
|
@ -329,8 +329,8 @@
|
|||
<junit printsummary="yes" showoutput="yes" maxmemory="${test.maxmemory}" clonevm="yes" haltonfailure="${halt}" failureProperty="test.failure">
|
||||
<formatter type="${testOutput}" usefile="${usefile}" />
|
||||
<classpath>
|
||||
<path refid="runtime.dependencies"/>
|
||||
<pathelement location="${dist.dir}/GenomeAnalysisTK.jar"/>
|
||||
<path refid="runtime.dependencies"/>
|
||||
<pathelement location="${test.classes}"/>
|
||||
<pathelement location="lib/junit-4.4.jar"/>
|
||||
</classpath>
|
||||
|
|
|
|||
|
|
@ -63,25 +63,35 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
|
|||
* @param locations List of locations for which to load data.
|
||||
*/
|
||||
IndexDelimitedLocusShardStrategy(SAMDataSource dataSource, GenomeLocSortedSet locations) {
|
||||
if(!(dataSource instanceof BlockDrivenSAMDataSource))
|
||||
throw new StingException("Cannot power an IndexDelimitedLocusShardStrategy with this data source.");
|
||||
if(dataSource != null) {
|
||||
// Shard based on reads.
|
||||
// TODO: Push this sharding into the data source.
|
||||
if(!(dataSource instanceof BlockDrivenSAMDataSource))
|
||||
throw new StingException("Cannot power an IndexDelimitedLocusShardStrategy with this data source.");
|
||||
|
||||
List<GenomeLoc> intervals;
|
||||
if(locations == null) {
|
||||
// If no locations were passed in, shard the entire BAM file.
|
||||
SAMFileHeader header = dataSource.getHeader();
|
||||
intervals = new ArrayList<GenomeLoc>();
|
||||
List<GenomeLoc> intervals;
|
||||
if(locations == null) {
|
||||
// If no locations were passed in, shard the entire BAM file.
|
||||
SAMFileHeader header = dataSource.getHeader();
|
||||
intervals = new ArrayList<GenomeLoc>();
|
||||
|
||||
for(SAMSequenceRecord sequenceRecord: header.getSequenceDictionary().getSequences())
|
||||
intervals.add(GenomeLocParser.createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength()));
|
||||
for(SAMSequenceRecord sequenceRecord: header.getSequenceDictionary().getSequences())
|
||||
intervals.add(GenomeLocParser.createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength()));
|
||||
}
|
||||
else
|
||||
intervals = locations.toList();
|
||||
|
||||
|
||||
this.dataSource = (BlockDrivenSAMDataSource)dataSource;
|
||||
filePointers.addAll(IntervalSharder.shardIntervals(this.dataSource,intervals,this.dataSource.getNumIndexLevels()-1));
|
||||
}
|
||||
else {
|
||||
this.dataSource = null;
|
||||
for(GenomeLoc interval: locations)
|
||||
filePointers.add(new FilePointer(interval));
|
||||
}
|
||||
else
|
||||
intervals = locations.toList();
|
||||
|
||||
|
||||
this.dataSource = (BlockDrivenSAMDataSource)dataSource;
|
||||
filePointers.addAll(IntervalSharder.shardIntervals(this.dataSource,intervals,this.dataSource.getNumIndexLevels()-1));
|
||||
filePointerIterator = filePointers.iterator();
|
||||
filePointerIterator = filePointers.iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -100,7 +110,7 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
|
|||
*/
|
||||
public IndexDelimitedLocusShard next() {
|
||||
FilePointer nextFilePointer = filePointerIterator.next();
|
||||
Map<SAMFileReader2,List<Chunk>> chunksBounding = dataSource.getFilePointersBounding(nextFilePointer.bin);
|
||||
Map<SAMFileReader2,List<Chunk>> chunksBounding = dataSource!=null ? dataSource.getFilePointersBounding(nextFilePointer.bin) : null;
|
||||
return new IndexDelimitedLocusShard(nextFilePointer.locations,chunksBounding,Shard.ShardType.LOCUS_INTERVAL);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -64,8 +64,12 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
ReadGroupMapping mapping = new ReadGroupMapping();
|
||||
|
||||
List<SAMReadGroupRecord> readGroups = reader.getFileHeader().getReadGroups();
|
||||
for(SAMReadGroupRecord readGroup: readGroups)
|
||||
mapping.put(readGroup.getReadGroupId(),headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()));
|
||||
for(SAMReadGroupRecord readGroup: readGroups) {
|
||||
if(headerMerger.hasReadGroupCollisions())
|
||||
mapping.put(readGroup.getReadGroupId(),headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()));
|
||||
else
|
||||
mapping.put(readGroup.getReadGroupId(),readGroup.getReadGroupId());
|
||||
}
|
||||
|
||||
mergedReadGroupMappings.put(id,mapping);
|
||||
}
|
||||
|
|
@ -110,7 +114,10 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
Map<SAMFileReader2,List<Chunk>> filePointers = new HashMap<SAMFileReader2,List<Chunk>>();
|
||||
for(SAMFileReader reader: readers) {
|
||||
SAMFileReader2 reader2 = (SAMFileReader2)reader;
|
||||
filePointers.put(reader2,reader2.getFilePointersBounding(bin));
|
||||
if(bin != null)
|
||||
filePointers.put(reader2,reader2.getFilePointersBounding(bin));
|
||||
else
|
||||
filePointers.put(reader2,Collections.<Chunk>emptyList());
|
||||
}
|
||||
return filePointers;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue