fixed sam by reads test for the new filtering code
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1180 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
2a86f2f833
commit
033bafe7a1
|
|
@ -153,6 +153,9 @@ public class ArtificialSAMUtils {
|
||||||
elements.add(new CigarElement(length, CigarOperator.characterToEnum('M')));
|
elements.add(new CigarElement(length, CigarOperator.characterToEnum('M')));
|
||||||
record.setCigar(new Cigar(elements));
|
record.setCigar(new Cigar(elements));
|
||||||
record.setProperPairFlag(false);
|
record.setProperPairFlag(false);
|
||||||
|
if (refIndex == -1) {
|
||||||
|
record.setReadUmappedFlag(true);
|
||||||
|
}
|
||||||
return record;
|
return record;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -175,6 +178,9 @@ public class ArtificialSAMUtils {
|
||||||
SAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length);
|
SAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length);
|
||||||
rec.setReadBases(bases);
|
rec.setReadBases(bases);
|
||||||
rec.setBaseQualities(qual);
|
rec.setBaseQualities(qual);
|
||||||
|
if (refIndex == -1) {
|
||||||
|
rec.setReadUmappedFlag(true);
|
||||||
|
}
|
||||||
return rec;
|
return rec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -63,16 +63,16 @@ public class SAMByReadsTest extends BaseTest {
|
||||||
// setup the test files
|
// setup the test files
|
||||||
fl.add(new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam"));
|
fl.add(new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam"));
|
||||||
reads = new Reads(fl);
|
reads = new Reads(fl);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Test out that we can shard the file and iterate over every read */
|
/**
|
||||||
|
* Test out that we can shard the file and iterate over every read
|
||||||
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testToUnmappedReads() {
|
public void testToUnmappedReads() {
|
||||||
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1,10,100,1000),
|
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1, 1, 1, 50),
|
||||||
ArtificialSAMUtils.mappedAndUnmappedReadIterator(1, 100, 10, 1000) );
|
ArtificialSAMUtils.mappedAndUnmappedReadIterator(1, 1, 1, 10));
|
||||||
|
|
||||||
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||||
try {
|
try {
|
||||||
|
|
@ -81,8 +81,6 @@ public class SAMByReadsTest extends BaseTest {
|
||||||
|
|
||||||
SAMDataSource data = new SAMDataSource(reads);
|
SAMDataSource data = new SAMDataSource(reads);
|
||||||
data.setResourcePool(gen);
|
data.setResourcePool(gen);
|
||||||
|
|
||||||
for (int x = 0; x < 10; x++) {
|
|
||||||
++iterations;
|
++iterations;
|
||||||
StingSAMIterator ret = data.toUnmappedReads(100);
|
StingSAMIterator ret = data.toUnmappedReads(100);
|
||||||
// count the reads we've gotten back
|
// count the reads we've gotten back
|
||||||
|
|
@ -93,82 +91,59 @@ public class SAMByReadsTest extends BaseTest {
|
||||||
ret.next();
|
ret.next();
|
||||||
unmappedReadsSeen++;
|
unmappedReadsSeen++;
|
||||||
}
|
}
|
||||||
}
|
assertEquals(10, unmappedReadsSeen);
|
||||||
assertEquals(1000,unmappedReadsSeen);
|
} catch (SimpleDataSourceLoadException e) {
|
||||||
}
|
|
||||||
|
|
||||||
catch (SimpleDataSourceLoadException e) {
|
|
||||||
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
||||||
fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException");
|
fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Test out that we can shard the file and iterate over every read */
|
/**
|
||||||
|
* Test out that we can shard the file and iterate over every read
|
||||||
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testShardingOfReadsSize14() {
|
public void testShardingOfReadsEvenSplit() {
|
||||||
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1,10,100,1000),
|
try {
|
||||||
ArtificialSAMUtils.queryReadIterator(1,10,100,1000) );
|
Thread.sleep(5000);
|
||||||
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
} catch (InterruptedException e) {
|
||||||
targetReadCount = 14;
|
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
||||||
|
}
|
||||||
|
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1, 1, 10, 50),
|
||||||
|
ArtificialSAMUtils.mappedAndUnmappedReadIterator(1, 1, 10, 10));
|
||||||
|
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||||
|
targetReadCount = 5;
|
||||||
try {
|
try {
|
||||||
int iterations = 0;
|
|
||||||
int readCount = 0;
|
int readCount = 0;
|
||||||
SAMDataSource data = new SAMDataSource(reads);
|
SAMDataSource data = new SAMDataSource(reads);
|
||||||
|
|
||||||
ArrayList<Integer> readsPerShard = new ArrayList<Integer>();
|
|
||||||
|
|
||||||
data.setResourcePool(gen);
|
data.setResourcePool(gen);
|
||||||
shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS, gen.getHeader().getSequenceDictionary(), targetReadCount);
|
shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS, gen.getHeader().getSequenceDictionary(), targetReadCount);
|
||||||
while (shardStrategy.hasNext()) {
|
while (shardStrategy.hasNext()) {
|
||||||
int initialReadCount = readCount;
|
|
||||||
|
|
||||||
StingSAMIterator ret = data.seek(shardStrategy.next());
|
StingSAMIterator ret = data.seek(shardStrategy.next());
|
||||||
assertTrue(ret != null);
|
assertTrue(ret != null);
|
||||||
while (ret.hasNext()) {
|
while (ret.hasNext()) {
|
||||||
ret.next();
|
ret.next();
|
||||||
readCount++;
|
readCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
readsPerShard.add(readCount-initialReadCount);
|
|
||||||
|
|
||||||
ret.close();
|
ret.close();
|
||||||
iterations++;
|
|
||||||
}
|
}
|
||||||
|
assertEquals(20, readCount);
|
||||||
// assert that we saw 2000 reads
|
} catch (SimpleDataSourceLoadException e) {
|
||||||
assertEquals(2000,readCount);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* this next assertion is based on the following logic:
|
|
||||||
* 14 reads per shard = 8 shards per each 100 read chromosome
|
|
||||||
* 10 chromosomes = 8 * 10 = 80
|
|
||||||
* 1000 unmapped reads / 14 = 72
|
|
||||||
* 1 iteration at the end to know we're done
|
|
||||||
* 80 + 72 + 1 = 153
|
|
||||||
*/
|
|
||||||
assertEquals(153,iterations);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
catch (SimpleDataSourceLoadException e) {
|
|
||||||
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
||||||
fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException");
|
fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Test out that we can shard the file and iterate over every read */
|
/**
|
||||||
|
* Test out that we can shard the file and iterate over every read
|
||||||
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testShardingOfReadsSize25() {
|
public void testShardingOfReadsOddRemainder() {
|
||||||
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1,10,100,1000),
|
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1, 1, 10, 100),
|
||||||
ArtificialSAMUtils.queryReadIterator(1,10,100,1000) );
|
ArtificialSAMUtils.mappedAndUnmappedReadIterator(1, 1, 10, 10));
|
||||||
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||||
targetReadCount = 25;
|
targetReadCount = 3;
|
||||||
try {
|
try {
|
||||||
int iterations = 0;
|
|
||||||
int readCount = 0;
|
int readCount = 0;
|
||||||
SAMDataSource data = new SAMDataSource(reads);
|
SAMDataSource data = new SAMDataSource(reads);
|
||||||
|
|
||||||
|
|
@ -185,36 +160,21 @@ public class SAMByReadsTest extends BaseTest {
|
||||||
readCount++;
|
readCount++;
|
||||||
}
|
}
|
||||||
ret.close();
|
ret.close();
|
||||||
iterations++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// assert that we saw 2000 reads
|
// assert that we saw 2000 reads
|
||||||
assertEquals(2000,readCount);
|
assertEquals(20, readCount);
|
||||||
|
|
||||||
/**
|
} catch (SimpleDataSourceLoadException e) {
|
||||||
* this next assertion is based on the following logic:
|
|
||||||
* 25 reads per shard = 5 shards (1 on the end to realize we're done)
|
|
||||||
* 10 chromosomes = 5 * 10 = 50
|
|
||||||
* 1000 unmapped reads / 25 = 40
|
|
||||||
* 1 iteration at the end to know we're done
|
|
||||||
* 50 + 40 + 1 = 91
|
|
||||||
*/
|
|
||||||
assertEquals(91,iterations);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
catch (SimpleDataSourceLoadException e) {
|
|
||||||
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
||||||
fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException");
|
fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private SAMFileHeader createArtificialSamHeader(int startingChr, int endingChr, int readCount, int readSize) {
|
private SAMFileHeader createArtificialSamHeader(int startingChr, int endingChr, int readCount, int readSize) {
|
||||||
return ArtificialSAMUtils.createArtificialSamHeader( ( endingChr - startingChr ) + 1,
|
return ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1,
|
||||||
startingChr,
|
startingChr,
|
||||||
readCount + readSize );
|
readCount + readSize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue