From a96ed385df96b889a8e5b564c869b865398c75fc Mon Sep 17 00:00:00 2001 From: David Roazen Date: Tue, 2 Oct 2012 13:43:01 -0400 Subject: [PATCH] ReadShard.getReadsSpan(): handle case where shard contains only unmapped mates Nasty, nasty bug -- if we were extremely unlucky with shard boundaries, we might end up with a shard containing only unmapped mates of mapped reads. In this case, ReadShard.getReadsSpan() would not behave correctly, since the shard as a whole would be marked "mapped" (since it refers to mapped intervals) yet consist only of unmapped mates of mapped reads located within those intervals. --- .../sting/gatk/datasources/reads/ReadShard.java | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java index 662c7526b..27e666f6f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java @@ -215,19 +215,29 @@ public class ReadShard extends Shard { int start = Integer.MAX_VALUE; int stop = Integer.MIN_VALUE; String contig = null; + boolean foundMapped = false; for ( final SAMRecord read : reads ) { if ( contig != null && ! read.getReferenceName().equals(contig) ) throw new ReviewedStingException("ReadShard contains reads spanning contig boundaries, which is no longer allowed. " + "First contig is " + contig + " next read was " + read.getReferenceName() ); contig = read.getReferenceName(); - if ( read.getAlignmentStart() < start ) start = read.getAlignmentStart(); - if ( read.getAlignmentEnd() > stop ) stop = read.getAlignmentEnd(); + + // Even if this shard as a *whole* is not "unmapped", we can still encounter *individual* unmapped mates + // of mapped reads within this shard's buffer. In fact, if we're very unlucky with shard boundaries, + // this shard might consist *only* of unmapped mates! We need to refrain from using the alignment + // starts/stops of these unmapped mates, and detect the case where the shard has been filled *only* + // with unmapped mates. + if ( ! read.getReadUnmappedFlag() ) { + foundMapped = true; + if ( read.getAlignmentStart() < start ) start = read.getAlignmentStart(); + if ( read.getAlignmentEnd() > stop ) stop = read.getAlignmentEnd(); + } } assert contig != null; - if ( contig.equals("*") ) // all reads are unmapped + if ( ! foundMapped || contig.equals("*") ) // all reads are unmapped return GenomeLoc.UNMAPPED; else return parser.createGenomeLoc(contig, start, stop);