Sharding system fixes for corner cases generally related to lack of coverage
in the BAM file. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2928 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a4d494c38b
commit
adea38fd5e
|
|
@ -329,8 +329,8 @@
|
||||||
<junit printsummary="yes" showoutput="yes" maxmemory="${test.maxmemory}" clonevm="yes" haltonfailure="${halt}" failureProperty="test.failure">
|
<junit printsummary="yes" showoutput="yes" maxmemory="${test.maxmemory}" clonevm="yes" haltonfailure="${halt}" failureProperty="test.failure">
|
||||||
<formatter type="${testOutput}" usefile="${usefile}" />
|
<formatter type="${testOutput}" usefile="${usefile}" />
|
||||||
<classpath>
|
<classpath>
|
||||||
<path refid="runtime.dependencies"/>
|
|
||||||
<pathelement location="${dist.dir}/GenomeAnalysisTK.jar"/>
|
<pathelement location="${dist.dir}/GenomeAnalysisTK.jar"/>
|
||||||
|
<path refid="runtime.dependencies"/>
|
||||||
<pathelement location="${test.classes}"/>
|
<pathelement location="${test.classes}"/>
|
||||||
<pathelement location="lib/junit-4.4.jar"/>
|
<pathelement location="lib/junit-4.4.jar"/>
|
||||||
</classpath>
|
</classpath>
|
||||||
|
|
|
||||||
|
|
@ -63,25 +63,35 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
|
||||||
* @param locations List of locations for which to load data.
|
* @param locations List of locations for which to load data.
|
||||||
*/
|
*/
|
||||||
IndexDelimitedLocusShardStrategy(SAMDataSource dataSource, GenomeLocSortedSet locations) {
|
IndexDelimitedLocusShardStrategy(SAMDataSource dataSource, GenomeLocSortedSet locations) {
|
||||||
if(!(dataSource instanceof BlockDrivenSAMDataSource))
|
if(dataSource != null) {
|
||||||
throw new StingException("Cannot power an IndexDelimitedLocusShardStrategy with this data source.");
|
// Shard based on reads.
|
||||||
|
// TODO: Push this sharding into the data source.
|
||||||
|
if(!(dataSource instanceof BlockDrivenSAMDataSource))
|
||||||
|
throw new StingException("Cannot power an IndexDelimitedLocusShardStrategy with this data source.");
|
||||||
|
|
||||||
List<GenomeLoc> intervals;
|
List<GenomeLoc> intervals;
|
||||||
if(locations == null) {
|
if(locations == null) {
|
||||||
// If no locations were passed in, shard the entire BAM file.
|
// If no locations were passed in, shard the entire BAM file.
|
||||||
SAMFileHeader header = dataSource.getHeader();
|
SAMFileHeader header = dataSource.getHeader();
|
||||||
intervals = new ArrayList<GenomeLoc>();
|
intervals = new ArrayList<GenomeLoc>();
|
||||||
|
|
||||||
for(SAMSequenceRecord sequenceRecord: header.getSequenceDictionary().getSequences())
|
for(SAMSequenceRecord sequenceRecord: header.getSequenceDictionary().getSequences())
|
||||||
intervals.add(GenomeLocParser.createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength()));
|
intervals.add(GenomeLocParser.createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength()));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
intervals = locations.toList();
|
||||||
|
|
||||||
|
|
||||||
|
this.dataSource = (BlockDrivenSAMDataSource)dataSource;
|
||||||
|
filePointers.addAll(IntervalSharder.shardIntervals(this.dataSource,intervals,this.dataSource.getNumIndexLevels()-1));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
this.dataSource = null;
|
||||||
|
for(GenomeLoc interval: locations)
|
||||||
|
filePointers.add(new FilePointer(interval));
|
||||||
}
|
}
|
||||||
else
|
|
||||||
intervals = locations.toList();
|
|
||||||
|
|
||||||
|
filePointerIterator = filePointers.iterator();
|
||||||
this.dataSource = (BlockDrivenSAMDataSource)dataSource;
|
|
||||||
filePointers.addAll(IntervalSharder.shardIntervals(this.dataSource,intervals,this.dataSource.getNumIndexLevels()-1));
|
|
||||||
filePointerIterator = filePointers.iterator();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -100,7 +110,7 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
|
||||||
*/
|
*/
|
||||||
public IndexDelimitedLocusShard next() {
|
public IndexDelimitedLocusShard next() {
|
||||||
FilePointer nextFilePointer = filePointerIterator.next();
|
FilePointer nextFilePointer = filePointerIterator.next();
|
||||||
Map<SAMFileReader2,List<Chunk>> chunksBounding = dataSource.getFilePointersBounding(nextFilePointer.bin);
|
Map<SAMFileReader2,List<Chunk>> chunksBounding = dataSource!=null ? dataSource.getFilePointersBounding(nextFilePointer.bin) : null;
|
||||||
return new IndexDelimitedLocusShard(nextFilePointer.locations,chunksBounding,Shard.ShardType.LOCUS_INTERVAL);
|
return new IndexDelimitedLocusShard(nextFilePointer.locations,chunksBounding,Shard.ShardType.LOCUS_INTERVAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -64,8 +64,12 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
||||||
ReadGroupMapping mapping = new ReadGroupMapping();
|
ReadGroupMapping mapping = new ReadGroupMapping();
|
||||||
|
|
||||||
List<SAMReadGroupRecord> readGroups = reader.getFileHeader().getReadGroups();
|
List<SAMReadGroupRecord> readGroups = reader.getFileHeader().getReadGroups();
|
||||||
for(SAMReadGroupRecord readGroup: readGroups)
|
for(SAMReadGroupRecord readGroup: readGroups) {
|
||||||
mapping.put(readGroup.getReadGroupId(),headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()));
|
if(headerMerger.hasReadGroupCollisions())
|
||||||
|
mapping.put(readGroup.getReadGroupId(),headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()));
|
||||||
|
else
|
||||||
|
mapping.put(readGroup.getReadGroupId(),readGroup.getReadGroupId());
|
||||||
|
}
|
||||||
|
|
||||||
mergedReadGroupMappings.put(id,mapping);
|
mergedReadGroupMappings.put(id,mapping);
|
||||||
}
|
}
|
||||||
|
|
@ -110,7 +114,10 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
||||||
Map<SAMFileReader2,List<Chunk>> filePointers = new HashMap<SAMFileReader2,List<Chunk>>();
|
Map<SAMFileReader2,List<Chunk>> filePointers = new HashMap<SAMFileReader2,List<Chunk>>();
|
||||||
for(SAMFileReader reader: readers) {
|
for(SAMFileReader reader: readers) {
|
||||||
SAMFileReader2 reader2 = (SAMFileReader2)reader;
|
SAMFileReader2 reader2 = (SAMFileReader2)reader;
|
||||||
filePointers.put(reader2,reader2.getFilePointersBounding(bin));
|
if(bin != null)
|
||||||
|
filePointers.put(reader2,reader2.getFilePointersBounding(bin));
|
||||||
|
else
|
||||||
|
filePointers.put(reader2,Collections.<Chunk>emptyList());
|
||||||
}
|
}
|
||||||
return filePointers;
|
return filePointers;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue