Fixes for shattering, added JUnit test case
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@332 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
62ac7366ed
commit
d517245beb
|
|
@ -1,8 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.FastaSequenceFile2;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Iterator;
|
||||
/**
|
||||
*
|
||||
|
|
@ -47,6 +51,9 @@ public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard>
|
|||
// do we have another contig?
|
||||
private boolean nextContig = false;
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
private static Logger logger = Logger.getLogger(ShardStrategy.class);
|
||||
|
||||
|
||||
/**
|
||||
* the constructor, taking a seq dictionary to parse out contigs
|
||||
|
|
@ -110,7 +117,7 @@ public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard>
|
|||
public Shard next() {
|
||||
// lets get some background info on the problem
|
||||
long length = dic.getSequence(seqLoc).getSequenceLength();
|
||||
long proposedSize = nextShardSize();
|
||||
long proposedSize = nextShardSize() - 1;
|
||||
long nextStart = mLoc.getStop() + 1;
|
||||
// can we fit it into the current seq size?
|
||||
if (nextStart + proposedSize < length) {
|
||||
|
|
@ -120,12 +127,14 @@ public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard>
|
|||
}
|
||||
// else we can't make it in the current location, we have to stitch one together
|
||||
else {
|
||||
lastGenomeLocSize = nextStart + proposedSize - length;
|
||||
|
||||
|
||||
long overflow = nextStart + proposedSize - length;
|
||||
logger.debug("Overflow = " + overflow + " length: " + length);
|
||||
lastGenomeLocSize = lastGenomeLocSize - overflow;
|
||||
// move to the next contig
|
||||
// the next sequence should start at the begining of the next contig
|
||||
Shard ret = Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + lastGenomeLocSize));
|
||||
jumpContig();
|
||||
return Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, lastGenomeLocSize));
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -133,14 +142,16 @@ public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard>
|
|||
/** jump to the next contig */
|
||||
private void jumpContig() {
|
||||
++seqLoc;
|
||||
if (dic.getSequences().size() <= seqLoc) {
|
||||
|
||||
if (!(seqLoc < dic.getSequences().size())) {
|
||||
nextContig = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// the next sequence should start at the begining of the next contig
|
||||
logger.debug("Next contig, name = " + dic.getSequence(seqLoc).getSequenceName());
|
||||
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), 0, 0);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -168,4 +179,8 @@ public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard>
|
|||
}
|
||||
|
||||
|
||||
public static void main (String[] strs) {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
@ -34,6 +35,10 @@ public class ShardStrategyFactory {
|
|||
LINEAR, EXPONENTIAL
|
||||
}
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
private static Logger logger = Logger.getLogger(ShardStrategyFactory.class);
|
||||
|
||||
|
||||
/**
|
||||
* get a new shatter strategy
|
||||
*
|
||||
|
|
|
|||
|
|
@ -0,0 +1,101 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||
|
||||
import static junit.framework.Assert.fail;
|
||||
import org.broadinstitute.sting.utils.FastaSequenceFile2;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.junit.*;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Apr 8, 2009
|
||||
* Time: 11:31:04 AM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Apr 8, 2009
|
||||
* <p/>
|
||||
* Class ShardFactoryTest
|
||||
* <p/>
|
||||
* A descriptions should go here. Blame aaron if it's missing.
|
||||
*/
|
||||
public class ShardStrategyFactoryTest {
|
||||
|
||||
FastaSequenceFile2 seq = null;
|
||||
|
||||
/**
|
||||
* This function (because of the @BeforeClass tag) gets called only once ever,
|
||||
* before any tests are run
|
||||
*/
|
||||
@BeforeClass
|
||||
public static void doBeforeAnyTests() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Tears down the test fixture after each call.
|
||||
* <p/>
|
||||
* Called after every test case method.
|
||||
*/
|
||||
@AfterClass
|
||||
public static void doAfterAllTests() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* This function does the setup of our parser, before each method call.
|
||||
* <p/>
|
||||
* Called before every test case method.
|
||||
*/
|
||||
@Before
|
||||
public void doForEachTest() {
|
||||
seq = new FastaSequenceFile2(new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Tears down the test fixture after each call.
|
||||
* <p/>
|
||||
* Called after every test case method.
|
||||
*/
|
||||
@After
|
||||
public void undoForEachTest() {
|
||||
|
||||
}
|
||||
|
||||
/** Tests that we got a string parameter in correctly */
|
||||
@Test
|
||||
public void testFullGenomeCycle() {
|
||||
ShardStrategy strategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, seq.getSequenceDictionary(), 100000);
|
||||
int shardCount = 0;
|
||||
try {
|
||||
|
||||
for (Shard s : strategy) {
|
||||
GenomeLoc l = s.getGenomeLoc();
|
||||
//logger.debug("Shard start: " + l.getStart() + " stop " + l.getStop() + " contig " + l.getContig());
|
||||
shardCount++;
|
||||
}
|
||||
|
||||
// check to make sure we got apple shards
|
||||
//logger.debug("shardCount : " + shardCount + " seq size = " + seq.getSequenceDictionary().size());
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
fail("We Shouldn't of seen an exception! : " + e.getMessage() + "; shard count " + shardCount);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue