Fixes for shattering, added JUnit test case

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@332 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-04-08 16:37:34 +00:00
parent 62ac7366ed
commit d517245beb
3 changed files with 129 additions and 8 deletions

View File

@ -1,8 +1,12 @@
package org.broadinstitute.sting.gatk.dataSources.shards;
import net.sf.samtools.SAMSequenceDictionary;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.FastaSequenceFile2;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.io.File;
import java.util.Iterator;
/**
*
@ -47,6 +51,9 @@ public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard>
// do we have another contig?
private boolean nextContig = false;
/** our log, which we want to capture anything from this class */
private static Logger logger = Logger.getLogger(ShardStrategy.class);
/**
* the constructor, taking a seq dictionary to parse out contigs
@ -110,7 +117,7 @@ public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard>
public Shard next() {
// lets get some background info on the problem
long length = dic.getSequence(seqLoc).getSequenceLength();
long proposedSize = nextShardSize();
long proposedSize = nextShardSize() - 1;
long nextStart = mLoc.getStop() + 1;
// can we fit it into the current seq size?
if (nextStart + proposedSize < length) {
@ -120,12 +127,14 @@ public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard>
}
// else we can't make it in the current location, we have to stitch one together
else {
lastGenomeLocSize = nextStart + proposedSize - length;
long overflow = nextStart + proposedSize - length;
logger.debug("Overflow = " + overflow + " length: " + length);
lastGenomeLocSize = lastGenomeLocSize - overflow;
// move to the next contig
// the next sequence should start at the begining of the next contig
Shard ret = Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + lastGenomeLocSize));
jumpContig();
return Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, lastGenomeLocSize));
return ret;
}
}
@ -133,14 +142,16 @@ public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard>
/** jump to the next contig */
private void jumpContig() {
++seqLoc;
if (dic.getSequences().size() <= seqLoc) {
if (!(seqLoc < dic.getSequences().size())) {
nextContig = false;
return;
}
// the next sequence should start at the begining of the next contig
logger.debug("Next contig, name = " + dic.getSequence(seqLoc).getSequenceName());
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), 0, 0);
}
/**
@ -168,4 +179,8 @@ public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard>
}
public static void main (String[] strs) {
}
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.dataSources.shards;
import net.sf.samtools.SAMSequenceDictionary;
import org.apache.log4j.Logger;
/**
*
@ -34,6 +35,10 @@ public class ShardStrategyFactory {
LINEAR, EXPONENTIAL
}
/** our log, which we want to capture anything from this class */
private static Logger logger = Logger.getLogger(ShardStrategyFactory.class);
/**
* get a new shatter strategy
*

View File

@ -0,0 +1,101 @@
package org.broadinstitute.sting.gatk.dataSources.shards;
import static junit.framework.Assert.fail;
import org.broadinstitute.sting.utils.FastaSequenceFile2;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.junit.*;
import java.io.File;
/**
*
* User: aaron
* Date: Apr 8, 2009
* Time: 11:31:04 AM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* @version 1.0
* @date Apr 8, 2009
* <p/>
* Class ShardFactoryTest
* <p/>
* A descriptions should go here. Blame aaron if it's missing.
*/
public class ShardStrategyFactoryTest {
FastaSequenceFile2 seq = null;
/**
* This function (because of the @BeforeClass tag) gets called only once ever,
* before any tests are run
*/
@BeforeClass
public static void doBeforeAnyTests() {
}
/**
* Tears down the test fixture after each call.
* <p/>
* Called after every test case method.
*/
@AfterClass
public static void doAfterAllTests() {
}
/**
* This function does the setup of our parser, before each method call.
* <p/>
* Called before every test case method.
*/
@Before
public void doForEachTest() {
seq = new FastaSequenceFile2(new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
}
/**
* Tears down the test fixture after each call.
* <p/>
* Called after every test case method.
*/
@After
public void undoForEachTest() {
}
/** Tests that we got a string parameter in correctly */
@Test
public void testFullGenomeCycle() {
ShardStrategy strategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, seq.getSequenceDictionary(), 100000);
int shardCount = 0;
try {
for (Shard s : strategy) {
GenomeLoc l = s.getGenomeLoc();
//logger.debug("Shard start: " + l.getStart() + " stop " + l.getStop() + " contig " + l.getContig());
shardCount++;
}
// check to make sure we got apple shards
//logger.debug("shardCount : " + shardCount + " seq size = " + seq.getSequenceDictionary().size());
} catch (Exception e) {
e.printStackTrace();
fail("We Shouldn't of seen an exception! : " + e.getMessage() + "; shard count " + shardCount);
}
}
}