From d517245bebbc19e3d17598d5d75e14caf65c10f1 Mon Sep 17 00:00:00 2001 From: aaron Date: Wed, 8 Apr 2009 16:37:34 +0000 Subject: [PATCH] Fixes for shattering, added JUnit test case git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@332 348d0f76-0448-11de-a6fe-93d51630548a --- .../dataSources/shards/ShardStrategy.java | 31 ++++-- .../shards/ShardStrategyFactory.java | 5 + .../shards/ShardStrategyFactoryTest.java | 101 ++++++++++++++++++ 3 files changed, 129 insertions(+), 8 deletions(-) create mode 100755 java/test/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactoryTest.java diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategy.java index 3ba7e46c6..a1ea0e044 100644 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategy.java @@ -1,8 +1,12 @@ package org.broadinstitute.sting.gatk.dataSources.shards; + import net.sf.samtools.SAMSequenceDictionary; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.FastaSequenceFile2; import org.broadinstitute.sting.utils.GenomeLoc; +import java.io.File; import java.util.Iterator; /** * @@ -47,6 +51,9 @@ public abstract class ShardStrategy implements Iterator, Iterable // do we have another contig? private boolean nextContig = false; + /** our log, which we want to capture anything from this class */ + private static Logger logger = Logger.getLogger(ShardStrategy.class); + /** * the constructor, taking a seq dictionary to parse out contigs @@ -110,7 +117,7 @@ public abstract class ShardStrategy implements Iterator, Iterable public Shard next() { // lets get some background info on the problem long length = dic.getSequence(seqLoc).getSequenceLength(); - long proposedSize = nextShardSize(); + long proposedSize = nextShardSize() - 1; long nextStart = mLoc.getStop() + 1; // can we fit it into the current seq size? if (nextStart + proposedSize < length) { @@ -120,12 +127,14 @@ public abstract class ShardStrategy implements Iterator, Iterable } // else we can't make it in the current location, we have to stitch one together else { - lastGenomeLocSize = nextStart + proposedSize - length; - - + long overflow = nextStart + proposedSize - length; + logger.debug("Overflow = " + overflow + " length: " + length); + lastGenomeLocSize = lastGenomeLocSize - overflow; // move to the next contig + // the next sequence should start at the begining of the next contig + Shard ret = Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + lastGenomeLocSize)); jumpContig(); - return Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, lastGenomeLocSize)); + return ret; } } @@ -133,14 +142,16 @@ public abstract class ShardStrategy implements Iterator, Iterable /** jump to the next contig */ private void jumpContig() { ++seqLoc; - if (dic.getSequences().size() <= seqLoc) { + + if (!(seqLoc < dic.getSequences().size())) { nextContig = false; return; } - - // the next sequence should start at the begining of the next contig + logger.debug("Next contig, name = " + dic.getSequence(seqLoc).getSequenceName()); mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), 0, 0); + + } /** @@ -168,4 +179,8 @@ public abstract class ShardStrategy implements Iterator, Iterable } + public static void main (String[] strs) { + + } + } diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java index ff4a27443..31717205e 100644 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.dataSources.shards; import net.sf.samtools.SAMSequenceDictionary; +import org.apache.log4j.Logger; /** * @@ -34,6 +35,10 @@ public class ShardStrategyFactory { LINEAR, EXPONENTIAL } + /** our log, which we want to capture anything from this class */ + private static Logger logger = Logger.getLogger(ShardStrategyFactory.class); + + /** * get a new shatter strategy * diff --git a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactoryTest.java b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactoryTest.java new file mode 100755 index 000000000..fa15e9773 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactoryTest.java @@ -0,0 +1,101 @@ +package org.broadinstitute.sting.gatk.dataSources.shards; + +import static junit.framework.Assert.fail; +import org.broadinstitute.sting.utils.FastaSequenceFile2; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.junit.*; + +import java.io.File; + +/** + * + * User: aaron + * Date: Apr 8, 2009 + * Time: 11:31:04 AM + * + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + */ + + +/** + * @author aaron + * @version 1.0 + * @date Apr 8, 2009 + *

+ * Class ShardFactoryTest + *

+ * A descriptions should go here. Blame aaron if it's missing. + */ +public class ShardStrategyFactoryTest { + + FastaSequenceFile2 seq = null; + + /** + * This function (because of the @BeforeClass tag) gets called only once ever, + * before any tests are run + */ + @BeforeClass + public static void doBeforeAnyTests() { + + } + + /** + * Tears down the test fixture after each call. + *

+ * Called after every test case method. + */ + @AfterClass + public static void doAfterAllTests() { + + } + + /** + * This function does the setup of our parser, before each method call. + *

+ * Called before every test case method. + */ + @Before + public void doForEachTest() { + seq = new FastaSequenceFile2(new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); + } + + /** + * Tears down the test fixture after each call. + *

+ * Called after every test case method. + */ + @After + public void undoForEachTest() { + + } + + /** Tests that we got a string parameter in correctly */ + @Test + public void testFullGenomeCycle() { + ShardStrategy strategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, seq.getSequenceDictionary(), 100000); + int shardCount = 0; + try { + + for (Shard s : strategy) { + GenomeLoc l = s.getGenomeLoc(); + //logger.debug("Shard start: " + l.getStart() + " stop " + l.getStop() + " contig " + l.getContig()); + shardCount++; + } + + // check to make sure we got apple shards + //logger.debug("shardCount : " + shardCount + " seq size = " + seq.getSequenceDictionary().size()); + + } catch (Exception e) { + e.printStackTrace(); + fail("We Shouldn't of seen an exception! : " + e.getMessage() + "; shard count " + shardCount); + } + } + +}