From bfe90af5e210349aa1a4f74f6b209031df17b332 Mon Sep 17 00:00:00 2001 From: hanna Date: Tue, 14 Jul 2009 01:25:20 +0000 Subject: [PATCH] Some quick and dirty fixes to support querying unmapped BAM files. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1228 348d0f76-0448-11de-a6fe-93d51630548a --- .../datasources/simpleDataSources/SAMDataSource.java | 12 ++++++++++++ .../gatk/iterators/MergingSamRecordIterator2.java | 12 +++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index 61781c0c6..f4ce5071c 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -177,6 +177,10 @@ public class SAMDataSource implements SimpleDataSource { private StingSAMIterator seekRead( ReadShard shard ) throws SimpleDataSourceLoadException { StingSAMIterator iter = null; + // If there are no entries in the sequence dictionary, there can't possibly be any unmapped reads. Force state to 'unmapped'. + if( isSequenceDictionaryEmpty() ) + intoUnmappedReads = true; + if (!intoUnmappedReads) { if (lastReadPos == null) { lastReadPos = GenomeLocParser.createGenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE); @@ -312,6 +316,14 @@ public class SAMDataSource implements SimpleDataSource { return bound; } + /** + * Determines whether the BAM file is completely unsequenced. Requires that the resource pool be initialized. + * @return True if the sequence dictionary is completely empty. False otherwise. + */ + private boolean isSequenceDictionaryEmpty() { + return getHeader().getSequenceDictionary().isEmpty(); + } + /** * Even though the iterator has seeked to the correct location, there may be multiple reads at that location, * and we may have given some of them out already. Move the iterator to the correct location using the readsAtLastPos variable diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java index 89e9aee85..931da02c1 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java @@ -154,7 +154,17 @@ public class MergingSamRecordIterator2 implements CloseableIterator, } final SAMRecordComparator comparator = getComparator(); for (final SAMFileReader reader : samHeaderMerger.getReaders()) { - Iterator recordIter = reader.queryUnmapped(); + Iterator recordIter = null; + if( reader.hasIndex() ) { + recordIter = reader.queryUnmapped(); + } + else { + // HACK: Supporting completely unmapped BAM files is easy. Let's do a quick check to make sure + // these BAMs aren't partially indexed. + if( reader.getFileHeader().getSequenceDictionary().size() > 0 ) + throw new StingException("Partially mapped BAM files without indices are not supported"); + recordIter = reader.iterator(); + } final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(reader, recordIter, comparator); addIfNotEmpty(iterator); }