fixed sam by reads test for the new filtering code

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1180 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-07-07 05:45:50 +00:00
parent 2a86f2f833
commit 033bafe7a1
2 changed files with 49 additions and 83 deletions

View File

@ -153,6 +153,9 @@ public class ArtificialSAMUtils {
elements.add(new CigarElement(length, CigarOperator.characterToEnum('M'))); elements.add(new CigarElement(length, CigarOperator.characterToEnum('M')));
record.setCigar(new Cigar(elements)); record.setCigar(new Cigar(elements));
record.setProperPairFlag(false); record.setProperPairFlag(false);
if (refIndex == -1) {
record.setReadUmappedFlag(true);
}
return record; return record;
} }
@ -175,6 +178,9 @@ public class ArtificialSAMUtils {
SAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length); SAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length);
rec.setReadBases(bases); rec.setReadBases(bases);
rec.setBaseQualities(qual); rec.setBaseQualities(qual);
if (refIndex == -1) {
rec.setReadUmappedFlag(true);
}
return rec; return rec;
} }

View File

@ -63,16 +63,16 @@ public class SAMByReadsTest extends BaseTest {
// setup the test files // setup the test files
fl.add(new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam")); fl.add(new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam"));
reads = new Reads(fl); reads = new Reads(fl);
} }
/** Test out that we can shard the file and iterate over every read */ /**
* Test out that we can shard the file and iterate over every read
*/
@Test @Test
public void testToUnmappedReads() { public void testToUnmappedReads() {
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1,10,100,1000), ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1, 1, 1, 50),
ArtificialSAMUtils.mappedAndUnmappedReadIterator(1, 100, 10, 1000) ); ArtificialSAMUtils.mappedAndUnmappedReadIterator(1, 1, 1, 10));
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary()); GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
try { try {
@ -81,94 +81,69 @@ public class SAMByReadsTest extends BaseTest {
SAMDataSource data = new SAMDataSource(reads); SAMDataSource data = new SAMDataSource(reads);
data.setResourcePool(gen); data.setResourcePool(gen);
++iterations;
for (int x = 0; x < 10; x++) { StingSAMIterator ret = data.toUnmappedReads(100);
++iterations; // count the reads we've gotten back
StingSAMIterator ret = data.toUnmappedReads(100); if (ret == null) {
// count the reads we've gotten back fail("On iteration " + iterations + " we were returned a null pointer, after seeing " + unmappedReadsSeen + " reads out of a 1000");
if (ret == null) {
fail("On iteration " + iterations + " we were returned a null pointer, after seeing " + unmappedReadsSeen + " reads out of a 1000");
}
while (ret.hasNext()) {
ret.next();
unmappedReadsSeen++;
}
} }
assertEquals(1000,unmappedReadsSeen); while (ret.hasNext()) {
} ret.next();
unmappedReadsSeen++;
catch (SimpleDataSourceLoadException e) { }
assertEquals(10, unmappedReadsSeen);
} catch (SimpleDataSourceLoadException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException"); fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException");
} }
} }
/** Test out that we can shard the file and iterate over every read */ /**
* Test out that we can shard the file and iterate over every read
*/
@Test @Test
public void testShardingOfReadsSize14() { public void testShardingOfReadsEvenSplit() {
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1,10,100,1000), try {
ArtificialSAMUtils.queryReadIterator(1,10,100,1000) ); Thread.sleep(5000);
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary()); } catch (InterruptedException e) {
targetReadCount = 14; e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1, 1, 10, 50),
ArtificialSAMUtils.mappedAndUnmappedReadIterator(1, 1, 10, 10));
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
targetReadCount = 5;
try { try {
int iterations = 0;
int readCount = 0; int readCount = 0;
SAMDataSource data = new SAMDataSource(reads); SAMDataSource data = new SAMDataSource(reads);
ArrayList<Integer> readsPerShard = new ArrayList<Integer>();
data.setResourcePool(gen); data.setResourcePool(gen);
shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS, gen.getHeader().getSequenceDictionary(), targetReadCount); shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS, gen.getHeader().getSequenceDictionary(), targetReadCount);
while (shardStrategy.hasNext()) { while (shardStrategy.hasNext()) {
int initialReadCount = readCount;
StingSAMIterator ret = data.seek(shardStrategy.next()); StingSAMIterator ret = data.seek(shardStrategy.next());
assertTrue(ret != null); assertTrue(ret != null);
while (ret.hasNext()) { while (ret.hasNext()) {
ret.next(); ret.next();
readCount++; readCount++;
} }
readsPerShard.add(readCount-initialReadCount);
ret.close(); ret.close();
iterations++;
} }
assertEquals(20, readCount);
// assert that we saw 2000 reads } catch (SimpleDataSourceLoadException e) {
assertEquals(2000,readCount);
/**
* this next assertion is based on the following logic:
* 14 reads per shard = 8 shards per each 100 read chromosome
* 10 chromosomes = 8 * 10 = 80
* 1000 unmapped reads / 14 = 72
* 1 iteration at the end to know we're done
* 80 + 72 + 1 = 153
*/
assertEquals(153,iterations);
}
catch (SimpleDataSourceLoadException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException"); fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException");
} }
} }
/** Test out that we can shard the file and iterate over every read */ /**
* Test out that we can shard the file and iterate over every read
*/
@Test @Test
public void testShardingOfReadsSize25() { public void testShardingOfReadsOddRemainder() {
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1,10,100,1000), ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1, 1, 10, 100),
ArtificialSAMUtils.queryReadIterator(1,10,100,1000) ); ArtificialSAMUtils.mappedAndUnmappedReadIterator(1, 1, 10, 10));
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary()); GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
targetReadCount = 25; targetReadCount = 3;
try { try {
int iterations = 0;
int readCount = 0; int readCount = 0;
SAMDataSource data = new SAMDataSource(reads); SAMDataSource data = new SAMDataSource(reads);
@ -185,36 +160,21 @@ public class SAMByReadsTest extends BaseTest {
readCount++; readCount++;
} }
ret.close(); ret.close();
iterations++;
} }
// assert that we saw 2000 reads // assert that we saw 2000 reads
assertEquals(2000,readCount); assertEquals(20, readCount);
/** } catch (SimpleDataSourceLoadException e) {
* this next assertion is based on the following logic:
* 25 reads per shard = 5 shards (1 on the end to realize we're done)
* 10 chromosomes = 5 * 10 = 50
* 1000 unmapped reads / 25 = 40
* 1 iteration at the end to know we're done
* 50 + 40 + 1 = 91
*/
assertEquals(91,iterations);
}
catch (SimpleDataSourceLoadException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException"); fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException");
} }
} }
private SAMFileHeader createArtificialSamHeader(int startingChr, int endingChr, int readCount, int readSize) { private SAMFileHeader createArtificialSamHeader(int startingChr, int endingChr, int readCount, int readSize) {
return ArtificialSAMUtils.createArtificialSamHeader( ( endingChr - startingChr ) + 1, return ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1,
startingChr, startingChr,
readCount + readSize ); readCount + readSize);
} }
} }