From e6fb122d7db987e7d94f3687f2ae753f702d4af2 Mon Sep 17 00:00:00 2001 From: aaron Date: Tue, 14 Apr 2009 22:19:36 +0000 Subject: [PATCH] Added some fixes and new iterator tests --This lin e, and those below, will be ignored-- A gatk/iterators AM gatk/iterators/BoundedReadIteratorTest.java M gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@420 348d0f76-0448-11de-a6fe-93d51630548a --- .../SAMBAMDataSourceTest.java | 7 +- .../iterators/BoundedReadIteratorTest.java | 131 ++++++++++++++++++ 2 files changed, 137 insertions(+), 1 deletion(-) create mode 100755 java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java index 0eecfb234..955aa9363 100755 --- a/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java +++ b/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java @@ -75,7 +75,7 @@ public class SAMBAMDataSourceTest extends BaseTest { /** Test out that we can shard the file and iterate over every read */ - //@Test + @Test public void testLinearBreakIterateAll() { logger.warn("Executing testLinearBreakIterateAll"); // the sharding strat. @@ -103,6 +103,11 @@ public class SAMBAMDataSourceTest extends BaseTest { readCount++; } datum.close(); + + // if we're over 100 shards, break out + if (count > 100) { + break; + } } } catch (SimpleDataSourceLoadException e) { diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java new file mode 100755 index 000000000..d613898d1 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorTest.java @@ -0,0 +1,131 @@ +package org.broadinstitute.sting.gatk.iterators; + +import static junit.framework.Assert.fail; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.dataSources.shards.Shard; +import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy; +import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory; +import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMBAMDataSource; +import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import org.junit.Before; +import org.junit.Test; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +/** + * + * User: aaron + * Date: Apr 14, 2009 + * Time: 5:48:48 PM + * + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + */ + + +/** + * @author aaron + * @version 1.0 + * @date Apr 14, 2009 + *

+ * Class BoundedReadIteratorTest + *

+ * A descriptions should go here. Blame aaron if it's missing. + */ +public class BoundedReadIteratorTest extends BaseTest { + + /** the file list and the fasta sequence */ + private List fl; + private FastaSequenceFile2 seq; + + /** + * This function does the setup of our parser, before each method call. + *

+ * Called before every test case method. + */ + @Before + public void doForEachTest() { + fl = new ArrayList(); + + // sequence + seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); + GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary()); + } + + + /** Test out that we can shard the file and iterate over every read */ + @Test + public void testBounding() { + logger.warn("Executing testBounding"); + // the sharding strat. + ShardStrategy strat = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, seq.getSequenceDictionary(), 100000); + int count = 0; + + + // setup the test files + fl.add(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188.aligned.duplicates_marked.bam"); + + // our target read + final long boundedReadCount = 100; + long shardReadCount = 0; + + try { + SAMBAMDataSource data = new SAMBAMDataSource(fl); + + // make sure we have a shard + if (!strat.hasNext()) { + fail("Our shatter didn't give us a single piece, this is bad"); + } + Shard sd = strat.next(); + + + MergingSamRecordIterator2 datum = data.seek(sd.getGenomeLoc()); + MergingSamRecordIterator2 datum2 = data.seek(sd.getGenomeLoc()); + + // check the reads in the shard + for (SAMRecord r : datum) { + shardReadCount++; + + } + + // create the bounded iterator + BoundedReadIterator iter = new BoundedReadIterator(datum2, boundedReadCount); + + // now see how many reads are in the bounded iterator + int readCount = 0; + for (SAMRecord r : iter) { + readCount++; + + } + + // close the iterators + datum.close(); + datum2.close(); + + // check to see that the sizes are the same + assertEquals(boundedReadCount,readCount); + assertTrue(readCount < shardReadCount); + + + } + catch (SimpleDataSourceLoadException e) { + e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. + fail("testLinearBreakIterateAll: We Should get a SimpleDataSourceLoadException"); + } + + + } +}