Fixes for shattering, added JUnit test case
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@332 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
62ac7366ed
commit
d517245beb
|
|
@ -1,8 +1,12 @@
|
||||||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||||
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.utils.FastaSequenceFile2;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
|
@ -47,6 +51,9 @@ public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard>
|
||||||
// do we have another contig?
|
// do we have another contig?
|
||||||
private boolean nextContig = false;
|
private boolean nextContig = false;
|
||||||
|
|
||||||
|
/** our log, which we want to capture anything from this class */
|
||||||
|
private static Logger logger = Logger.getLogger(ShardStrategy.class);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* the constructor, taking a seq dictionary to parse out contigs
|
* the constructor, taking a seq dictionary to parse out contigs
|
||||||
|
|
@ -110,7 +117,7 @@ public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard>
|
||||||
public Shard next() {
|
public Shard next() {
|
||||||
// lets get some background info on the problem
|
// lets get some background info on the problem
|
||||||
long length = dic.getSequence(seqLoc).getSequenceLength();
|
long length = dic.getSequence(seqLoc).getSequenceLength();
|
||||||
long proposedSize = nextShardSize();
|
long proposedSize = nextShardSize() - 1;
|
||||||
long nextStart = mLoc.getStop() + 1;
|
long nextStart = mLoc.getStop() + 1;
|
||||||
// can we fit it into the current seq size?
|
// can we fit it into the current seq size?
|
||||||
if (nextStart + proposedSize < length) {
|
if (nextStart + proposedSize < length) {
|
||||||
|
|
@ -120,12 +127,14 @@ public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard>
|
||||||
}
|
}
|
||||||
// else we can't make it in the current location, we have to stitch one together
|
// else we can't make it in the current location, we have to stitch one together
|
||||||
else {
|
else {
|
||||||
lastGenomeLocSize = nextStart + proposedSize - length;
|
long overflow = nextStart + proposedSize - length;
|
||||||
|
logger.debug("Overflow = " + overflow + " length: " + length);
|
||||||
|
lastGenomeLocSize = lastGenomeLocSize - overflow;
|
||||||
// move to the next contig
|
// move to the next contig
|
||||||
|
// the next sequence should start at the begining of the next contig
|
||||||
|
Shard ret = Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + lastGenomeLocSize));
|
||||||
jumpContig();
|
jumpContig();
|
||||||
return Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, lastGenomeLocSize));
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -133,14 +142,16 @@ public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard>
|
||||||
/** jump to the next contig */
|
/** jump to the next contig */
|
||||||
private void jumpContig() {
|
private void jumpContig() {
|
||||||
++seqLoc;
|
++seqLoc;
|
||||||
if (dic.getSequences().size() <= seqLoc) {
|
|
||||||
|
if (!(seqLoc < dic.getSequences().size())) {
|
||||||
nextContig = false;
|
nextContig = false;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
logger.debug("Next contig, name = " + dic.getSequence(seqLoc).getSequenceName());
|
||||||
// the next sequence should start at the begining of the next contig
|
|
||||||
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), 0, 0);
|
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), 0, 0);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -168,4 +179,8 @@ public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static void main (String[] strs) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
|
@ -34,6 +35,10 @@ public class ShardStrategyFactory {
|
||||||
LINEAR, EXPONENTIAL
|
LINEAR, EXPONENTIAL
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** our log, which we want to capture anything from this class */
|
||||||
|
private static Logger logger = Logger.getLogger(ShardStrategyFactory.class);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get a new shatter strategy
|
* get a new shatter strategy
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,101 @@
|
||||||
|
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||||
|
|
||||||
|
import static junit.framework.Assert.fail;
|
||||||
|
import org.broadinstitute.sting.utils.FastaSequenceFile2;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.junit.*;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* User: aaron
|
||||||
|
* Date: Apr 8, 2009
|
||||||
|
* Time: 11:31:04 AM
|
||||||
|
*
|
||||||
|
* The Broad Institute
|
||||||
|
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||||
|
* This software and its documentation are copyright 2009 by the
|
||||||
|
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||||
|
*
|
||||||
|
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||||
|
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author aaron
|
||||||
|
* @version 1.0
|
||||||
|
* @date Apr 8, 2009
|
||||||
|
* <p/>
|
||||||
|
* Class ShardFactoryTest
|
||||||
|
* <p/>
|
||||||
|
* A descriptions should go here. Blame aaron if it's missing.
|
||||||
|
*/
|
||||||
|
public class ShardStrategyFactoryTest {
|
||||||
|
|
||||||
|
FastaSequenceFile2 seq = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function (because of the @BeforeClass tag) gets called only once ever,
|
||||||
|
* before any tests are run
|
||||||
|
*/
|
||||||
|
@BeforeClass
|
||||||
|
public static void doBeforeAnyTests() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tears down the test fixture after each call.
|
||||||
|
* <p/>
|
||||||
|
* Called after every test case method.
|
||||||
|
*/
|
||||||
|
@AfterClass
|
||||||
|
public static void doAfterAllTests() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function does the setup of our parser, before each method call.
|
||||||
|
* <p/>
|
||||||
|
* Called before every test case method.
|
||||||
|
*/
|
||||||
|
@Before
|
||||||
|
public void doForEachTest() {
|
||||||
|
seq = new FastaSequenceFile2(new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tears down the test fixture after each call.
|
||||||
|
* <p/>
|
||||||
|
* Called after every test case method.
|
||||||
|
*/
|
||||||
|
@After
|
||||||
|
public void undoForEachTest() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Tests that we got a string parameter in correctly */
|
||||||
|
@Test
|
||||||
|
public void testFullGenomeCycle() {
|
||||||
|
ShardStrategy strategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, seq.getSequenceDictionary(), 100000);
|
||||||
|
int shardCount = 0;
|
||||||
|
try {
|
||||||
|
|
||||||
|
for (Shard s : strategy) {
|
||||||
|
GenomeLoc l = s.getGenomeLoc();
|
||||||
|
//logger.debug("Shard start: " + l.getStart() + " stop " + l.getStop() + " contig " + l.getContig());
|
||||||
|
shardCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// check to make sure we got apple shards
|
||||||
|
//logger.debug("shardCount : " + shardCount + " seq size = " + seq.getSequenceDictionary().size());
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
fail("We Shouldn't of seen an exception! : " + e.getMessage() + "; shard count " + shardCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue