ReadShard are no longer allowed to span multiple contigs

-- Previous behavior was unnecessary and causes all sorts of problems with RODs for reads.  The old implementation simply failed in this case.  The new code handles this correctly by forcing shards to have all of their data on a single contig.
-- Added a PrintReads integration test to ensure this behavior is correct
-- Adding test BAMs that have < 200 reads and span across contig boundaries
This commit is contained in:
Mark DePristo 2012-08-29 18:43:02 -04:00
parent a3f443c1cc
commit ce3d1f89ea
3 changed files with 15 additions and 4 deletions

View File

@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.*;
@ -140,6 +141,9 @@ public class ReadShard extends Shard {
String contig = null;
for ( final SAMRecord read : reads ) {
if ( contig != null && ! read.getReferenceName().equals(contig) )
throw new ReviewedStingException("ReadShard contains reads spanning contig boundaries, which is no longer allowed. "
+ "First contig is " + contig + " next read was " + read.getReferenceName() );
contig = read.getReferenceName();
if ( read.getAlignmentStart() < start ) start = read.getAlignmentStart();
if ( read.getAlignmentEnd() > stop ) stop = read.getAlignmentEnd();

View File

@ -486,9 +486,15 @@ public class SAMDataSource {
CloseableIterator<SAMRecord> iterator = getIterator(readers,shard,sortOrder == SAMFileHeader.SortOrder.coordinate);
while(!shard.isBufferFull() && iterator.hasNext()) {
read = iterator.next();
shard.addRead(read);
noteFilePositionUpdate(positionUpdates,read);
final SAMRecord nextRead = iterator.next();
if ( read == null || (nextRead.getReferenceIndex().equals(read.getReferenceIndex())) ) {
// only add reads to the shard if they are on the same contig
read = nextRead;
shard.addRead(read);
noteFilePositionUpdate(positionUpdates,read);
} else {
break;
}
}
// If the reads are sorted in queryname order, ensure that all reads

View File

@ -38,7 +38,8 @@ public class PrintReadsIntegrationTest extends WalkerTest {
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L 1", "6e920b8505e7e95d67634b0905237dbc")},
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L unmapped", "13bb9a91b1d4dd2425f73302b8a1ac1c")},
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L 1 -L unmapped", "6e920b8505e7e95d67634b0905237dbc")},
{new PRTest(b37KGReference, "oneReadAllInsertion.bam", "", "6caec4f8a25befb6aba562955401af93")}
{new PRTest(b37KGReference, "oneReadAllInsertion.bam", "", "6caec4f8a25befb6aba562955401af93")},
{new PRTest(b37KGReference, "NA12878.1_10mb_2_10mb.bam", "", "c43380ac39b98853af457b90e52f8427")}
};
}