ReadShard are no longer allowed to span multiple contigs
-- Previous behavior was unnecessary and causes all sorts of problems with RODs for reads. The old implementation simply failed in this case. The new code handles this correctly by forcing shards to have all of their data on a single contig. -- Added a PrintReads integration test to ensure this behavior is correct -- Adding test BAMs that have < 200 reads and span across contig boundaries
This commit is contained in:
parent
a3f443c1cc
commit
ce3d1f89ea
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
|||
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -140,6 +141,9 @@ public class ReadShard extends Shard {
|
|||
String contig = null;
|
||||
|
||||
for ( final SAMRecord read : reads ) {
|
||||
if ( contig != null && ! read.getReferenceName().equals(contig) )
|
||||
throw new ReviewedStingException("ReadShard contains reads spanning contig boundaries, which is no longer allowed. "
|
||||
+ "First contig is " + contig + " next read was " + read.getReferenceName() );
|
||||
contig = read.getReferenceName();
|
||||
if ( read.getAlignmentStart() < start ) start = read.getAlignmentStart();
|
||||
if ( read.getAlignmentEnd() > stop ) stop = read.getAlignmentEnd();
|
||||
|
|
|
|||
|
|
@ -486,9 +486,15 @@ public class SAMDataSource {
|
|||
|
||||
CloseableIterator<SAMRecord> iterator = getIterator(readers,shard,sortOrder == SAMFileHeader.SortOrder.coordinate);
|
||||
while(!shard.isBufferFull() && iterator.hasNext()) {
|
||||
read = iterator.next();
|
||||
shard.addRead(read);
|
||||
noteFilePositionUpdate(positionUpdates,read);
|
||||
final SAMRecord nextRead = iterator.next();
|
||||
if ( read == null || (nextRead.getReferenceIndex().equals(read.getReferenceIndex())) ) {
|
||||
// only add reads to the shard if they are on the same contig
|
||||
read = nextRead;
|
||||
shard.addRead(read);
|
||||
noteFilePositionUpdate(positionUpdates,read);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If the reads are sorted in queryname order, ensure that all reads
|
||||
|
|
|
|||
|
|
@ -38,7 +38,8 @@ public class PrintReadsIntegrationTest extends WalkerTest {
|
|||
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L 1", "6e920b8505e7e95d67634b0905237dbc")},
|
||||
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L unmapped", "13bb9a91b1d4dd2425f73302b8a1ac1c")},
|
||||
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L 1 -L unmapped", "6e920b8505e7e95d67634b0905237dbc")},
|
||||
{new PRTest(b37KGReference, "oneReadAllInsertion.bam", "", "6caec4f8a25befb6aba562955401af93")}
|
||||
{new PRTest(b37KGReference, "oneReadAllInsertion.bam", "", "6caec4f8a25befb6aba562955401af93")},
|
||||
{new PRTest(b37KGReference, "NA12878.1_10mb_2_10mb.bam", "", "c43380ac39b98853af457b90e52f8427")}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue