Added some new code for shard support over reads
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@385 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d44c30154a
commit
dd604799dc
|
|
@ -0,0 +1,80 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Apr 6, 2009
|
||||
* Time: 8:23:19 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Apr 6, 2009
|
||||
* <p/>
|
||||
* Class LinearShard
|
||||
* <p/>
|
||||
* A exponential strategy
|
||||
*/
|
||||
public class ExpGrowthLocusShardStrategy extends LocusShardStrategy {
|
||||
|
||||
// fixed size
|
||||
private long baseSize = 100000;
|
||||
private long currentExp = 0;
|
||||
|
||||
/**
|
||||
* the constructor, taking a seq dictionary to parse out contigs
|
||||
*
|
||||
* @param dic the seq dictionary
|
||||
*/
|
||||
ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize) {
|
||||
super(dic);
|
||||
this.baseSize = startSize;
|
||||
currentExp = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* the constructor, taking a seq dictionary to parse out contigs
|
||||
*
|
||||
* @param strat the shatter to convert from
|
||||
*/
|
||||
ExpGrowthLocusShardStrategy(LocusShardStrategy strat) {
|
||||
super(strat);
|
||||
this.baseSize = strat.nextShardSize();
|
||||
currentExp = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* set the next shards size
|
||||
*
|
||||
* @param size adjust the next size to this
|
||||
*/
|
||||
public void adjustNextShardSize(long size) {
|
||||
baseSize = size;
|
||||
currentExp = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is how the various shards strategies implements their approach
|
||||
*
|
||||
* @return the next shard size
|
||||
*/
|
||||
protected long nextShardSize() {
|
||||
// we grow the exponentially, we just have to make sure we start at zero
|
||||
++currentExp;
|
||||
return (long) Math.floor(Math.pow((double) baseSize, (double) currentExp));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Apr 6, 2009
|
||||
* Time: 7:18:19 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Apr 6, 2009
|
||||
* <p/>
|
||||
* Class AdaptiveShard
|
||||
* <p/>
|
||||
* allows you to change the sharding length as you traverse
|
||||
*/
|
||||
class LinearLocusShardStrategy extends LocusShardStrategy {
|
||||
|
||||
// default the next size to 100,000
|
||||
private long nextShardSize = 100000;
|
||||
|
||||
/**
|
||||
* the constructor, taking a seq dictionary to parse out contigs
|
||||
*
|
||||
* @param dic the seq dictionary
|
||||
*/
|
||||
LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize) {
|
||||
super(dic);
|
||||
this.nextShardSize = startSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* the constructor, taking a seq dictionary to parse out contigs
|
||||
*
|
||||
* @param strat the shatter to convert from
|
||||
*/
|
||||
LinearLocusShardStrategy(LocusShardStrategy strat) {
|
||||
super(strat);
|
||||
this.nextShardSize = strat.nextShardSize();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* set the next shards size
|
||||
*
|
||||
* @param size adjust the next size to this
|
||||
*/
|
||||
public void adjustNextShardSize(long size) {
|
||||
nextShardSize = size;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is how the various shards strategies implements their approach
|
||||
*
|
||||
* @return the next shard size
|
||||
*/
|
||||
protected long nextShardSize() {
|
||||
return nextShardSize;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Apr 7, 2009
|
||||
* Time: 1:19:49 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Apr 7, 2009
|
||||
* <p/>
|
||||
* Class Shard
|
||||
* <p/>
|
||||
* This is the base class for shards. Right now it does little more then
|
||||
* wrap GenomeLoc (actually nothing more), but it's good to have the class
|
||||
* in place so it's easier to change guts later.
|
||||
*/
|
||||
public class LocusShard implements Shard {
|
||||
|
||||
// currently our location
|
||||
final GenomeLoc mLoc;
|
||||
|
||||
public LocusShard(GenomeLoc loc) {
|
||||
this.mLoc = loc;
|
||||
}
|
||||
|
||||
/** @return the genome location represented by this shard */
|
||||
public GenomeLoc getGenomeLoc() {
|
||||
return mLoc;
|
||||
}
|
||||
|
||||
/**
|
||||
* what kind of shard do we return
|
||||
*
|
||||
* @return ShardType, indicating the type
|
||||
*/
|
||||
public ShardType getShardType() {
|
||||
return ShardType.LOCUS;
|
||||
}
|
||||
|
||||
/**
|
||||
* return a shard representing the passed in GenomeLoc
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public static LocusShard toShard(GenomeLoc loc) {
|
||||
return new LocusShard(loc);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,171 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.Iterator;
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Apr 6, 2009
|
||||
* Time: 11:23:17 AM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Apr 6, 2009
|
||||
* <p/>
|
||||
* Interface Shard
|
||||
* <p/>
|
||||
* The shard interface, which controls how data is divided
|
||||
*/
|
||||
public abstract class LocusShardStrategy implements ShardStrategy {
|
||||
|
||||
// this stores the seq dictionary, which is a reference for the
|
||||
// lengths and names of contigs, which you need to generate an iterative stratagy
|
||||
protected final SAMSequenceDictionary dic;
|
||||
|
||||
// the current genome location
|
||||
protected GenomeLoc mLoc = null;
|
||||
|
||||
// current seq location
|
||||
protected int seqLoc = 0;
|
||||
|
||||
// the actual last size; this can change based on contig endings
|
||||
protected long lastGenomeLocSize = 0;
|
||||
|
||||
// do we have another contig?
|
||||
private boolean nextContig = false;
|
||||
|
||||
|
||||
/**
|
||||
* the constructor, taking a seq dictionary to parse out contigs
|
||||
*
|
||||
* @param dic the seq dictionary
|
||||
*/
|
||||
LocusShardStrategy(SAMSequenceDictionary dic) {
|
||||
this.dic = dic;
|
||||
mLoc = new GenomeLoc(dic.getSequence(0).getSequenceName(), 0, 0);
|
||||
if (dic.getSequences().size() > 0) {
|
||||
nextContig = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* the copy constructor,
|
||||
*
|
||||
* @param old the old strategy
|
||||
*/
|
||||
LocusShardStrategy(LocusShardStrategy old) {
|
||||
this.dic = old.dic;
|
||||
this.mLoc = old.mLoc;
|
||||
this.seqLoc = old.seqLoc;
|
||||
this.lastGenomeLocSize = old.lastGenomeLocSize;
|
||||
this.nextContig = old.nextContig;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Abstract methods that each strategy has to implement
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* set the next shards size
|
||||
*
|
||||
* @param size adjust the next size to this
|
||||
*/
|
||||
public abstract void adjustNextShardSize(long size);
|
||||
|
||||
|
||||
/**
|
||||
* This is how the various shards strategies implements their approach
|
||||
*
|
||||
* @return the next shard size
|
||||
*/
|
||||
abstract long nextShardSize();
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* Concrete methods that each strategy does not have to implement
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* get the next shard, based on the return size of nextShardSize
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public LocusShard next() {
|
||||
// lets get some background info on the problem
|
||||
long length = dic.getSequence(seqLoc).getSequenceLength();
|
||||
long proposedSize = nextShardSize();
|
||||
long nextStart = mLoc.getStop() + 1;
|
||||
// can we fit it into the current seq size?
|
||||
if (nextStart + proposedSize < length) {
|
||||
lastGenomeLocSize = proposedSize;
|
||||
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize);
|
||||
return LocusShard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize));
|
||||
}
|
||||
// else we can't make it in the current location, we have to stitch one together
|
||||
else {
|
||||
lastGenomeLocSize = nextStart + proposedSize - length;
|
||||
|
||||
|
||||
// move to the next contig
|
||||
jumpContig();
|
||||
return LocusShard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, lastGenomeLocSize));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** jump to the next contig */
|
||||
private void jumpContig() {
|
||||
++seqLoc;
|
||||
if (dic.getSequences().size() <= seqLoc) {
|
||||
nextContig = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// the next sequence should start at the begining of the next contig
|
||||
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), 0, 0);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* is there another GenomeLoc to get?
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
return nextContig;
|
||||
}
|
||||
|
||||
/** we don't support remove */
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can not remove records from a shard iterator!");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* to be for-each(able), we must implement this method
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Iterator<Shard> iterator() {
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Apr 10, 2009
|
||||
* Time: 5:03:13 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Apr 10, 2009
|
||||
* <p/>
|
||||
* Class ReadShard
|
||||
* <p/>
|
||||
* A class for sharded reads.
|
||||
*/
|
||||
public class ReadShard implements Shard {
|
||||
|
||||
// the count of the reads we want to copy off
|
||||
int size = 0;
|
||||
|
||||
/**
|
||||
* create a read shard, given a read size
|
||||
* @param size
|
||||
*/
|
||||
public ReadShard(int size) {
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
/** @return the genome location represented by this shard */
|
||||
public GenomeLoc getGenomeLoc() {
|
||||
return null; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* what kind of shard do we return
|
||||
*
|
||||
* @return ShardType, indicating the type
|
||||
*/
|
||||
public ShardType getShardType() {
|
||||
return ShardType.READ;
|
||||
}
|
||||
}
|
||||
|
|
@ -2,11 +2,12 @@ package org.broadinstitute.sting.gatk.dataSources.shards;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.io.Serializable;
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Apr 7, 2009
|
||||
* Time: 1:19:49 PM
|
||||
* Date: Apr 10, 2009
|
||||
* Time: 5:00:27 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
|
|
@ -18,38 +19,26 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Apr 7, 2009
|
||||
* @date Apr 10, 2009
|
||||
* <p/>
|
||||
* Class Shard
|
||||
* Interface Shard
|
||||
* <p/>
|
||||
* This is the base class for shards. Right now it does little more then
|
||||
* wrap GenomeLoc (actually nothing more), but it's good to have the class
|
||||
* in place so it's easier to change guts later.
|
||||
* The base interface for shards.
|
||||
*/
|
||||
public class Shard {
|
||||
|
||||
// currently our location
|
||||
final GenomeLoc mLoc;
|
||||
|
||||
public Shard(GenomeLoc loc) {
|
||||
this.mLoc = loc;
|
||||
public interface Shard extends Serializable {
|
||||
enum ShardType {
|
||||
READ, LOCUS
|
||||
}
|
||||
|
||||
/** @return the genome location represented by this shard */
|
||||
public GenomeLoc getGenomeLoc() {
|
||||
return mLoc;
|
||||
}
|
||||
public GenomeLoc getGenomeLoc();
|
||||
|
||||
/**
|
||||
* return a shard representing the passed in GenomeLoc
|
||||
*
|
||||
* @return
|
||||
* what kind of shard do we return
|
||||
* @return ShardType, indicating the type
|
||||
*/
|
||||
public static Shard toShard(GenomeLoc loc) {
|
||||
return new Shard(loc);
|
||||
}
|
||||
public ShardType getShardType();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,17 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Apr 6, 2009
|
||||
* Time: 11:23:17 AM
|
||||
* Date: Apr 10, 2009
|
||||
* Time: 4:55:37 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
|
|
@ -26,239 +20,12 @@ import java.util.List;
|
|||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Apr 6, 2009
|
||||
* @date Apr 10, 2009
|
||||
* <p/>
|
||||
* Interface Shard
|
||||
* Interface ShardStrategy
|
||||
* <p/>
|
||||
* The shard interface, which controls how data is divided
|
||||
* The base interface for the sharding strategy; before we had a base abstract
|
||||
* class, but not this will be an interface to accomidate read based sharding
|
||||
*/
|
||||
public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard> {
|
||||
|
||||
// this stores the seq dictionary, which is a reference for the
|
||||
// lengths and names of contigs, which you need to generate an iterative stratagy
|
||||
protected final SAMSequenceDictionary dic;
|
||||
|
||||
// the current genome location
|
||||
protected GenomeLoc mLoc = null;
|
||||
|
||||
// current seq location
|
||||
protected int seqLoc = 0;
|
||||
|
||||
// the actual last size; this can change based on contig endings
|
||||
protected long lastGenomeLocSize = 0;
|
||||
|
||||
// do we have another contig?
|
||||
private boolean nextContig = false;
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
private static Logger logger = Logger.getLogger(ShardStrategy.class);
|
||||
|
||||
/** our interal list * */
|
||||
private List<GenomeLoc> intervals = null;
|
||||
/** our interal list * */
|
||||
private int currentInterval = -1;
|
||||
|
||||
/**
|
||||
* the constructor, taking a seq dictionary to parse out contigs
|
||||
*
|
||||
* @param dic the seq dictionary
|
||||
*/
|
||||
ShardStrategy(SAMSequenceDictionary dic) {
|
||||
this.dic = dic;
|
||||
mLoc = new GenomeLoc(dic.getSequence(0).getSequenceName(), 0, 0);
|
||||
if (dic.getSequences().size() > 0) {
|
||||
nextContig = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* the copy constructor,
|
||||
*
|
||||
* @param old the old strategy
|
||||
*/
|
||||
ShardStrategy(ShardStrategy old) {
|
||||
this.dic = old.dic;
|
||||
this.mLoc = old.mLoc;
|
||||
this.seqLoc = old.seqLoc;
|
||||
this.lastGenomeLocSize = old.lastGenomeLocSize;
|
||||
this.nextContig = old.nextContig;
|
||||
}
|
||||
|
||||
/**
|
||||
* the constructor, taking a seq dictionary to parse out contigs
|
||||
*
|
||||
* @param dic the seq dictionary
|
||||
* @param intervals file
|
||||
*/
|
||||
ShardStrategy(SAMSequenceDictionary dic, List<GenomeLoc> intervals) {
|
||||
this.dic = dic;
|
||||
this.intervals = intervals;
|
||||
this.currentInterval = 0;
|
||||
|
||||
mLoc = new GenomeLoc(intervals.get(0).getContig(), intervals.get(0).getStart() - 1, intervals.get(0).getStart() - 1);
|
||||
if (dic.getSequences().size() > 0) {
|
||||
nextContig = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Abstract methods that each strategy has to implement
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* set the next shards size
|
||||
*
|
||||
* @param size adjust the next size to this
|
||||
*/
|
||||
public abstract void adjustNextShardSize(long size);
|
||||
|
||||
|
||||
/**
|
||||
* This is how the various shards strategies implements their approach
|
||||
*
|
||||
* @return the next shard size
|
||||
*/
|
||||
abstract long nextShardSize();
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* Concrete methods that each strategy does not have to implement
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* get the next shard, based on the return size of nextShardSize
|
||||
*
|
||||
* @return the next shard
|
||||
*/
|
||||
public Shard next() {
|
||||
|
||||
// lets get some background info on the problem
|
||||
long length = dic.getSequence(seqLoc).getSequenceLength();
|
||||
long proposedSize = nextShardSize();
|
||||
long nextStart = mLoc.getStop() + 1;
|
||||
|
||||
// if we don't have an interval file, use the non interval based approach. Simple, eh?
|
||||
if (this.intervals == null) {
|
||||
return nonIntervaledNext(length, proposedSize, nextStart);
|
||||
} else {
|
||||
return intervaledNext(length, proposedSize, nextStart);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private Shard intervaledNext(long length, long proposedSize, long nextStart) {
|
||||
// get the current genome location
|
||||
GenomeLoc loc = intervals.get(currentInterval);
|
||||
if (nextStart + proposedSize > loc.getStop()) {
|
||||
// we need to move the next interval
|
||||
proposedSize = loc.getStop() - nextStart;
|
||||
lastGenomeLocSize = proposedSize;
|
||||
|
||||
// the next sequence should start at the begining of the next contig
|
||||
Shard ret = Shard.toShard(new GenomeLoc(intervals.get(currentInterval).getContig(), nextStart, nextStart + proposedSize - 1));
|
||||
|
||||
++currentInterval;
|
||||
if (intervals.size() > currentInterval) {
|
||||
mLoc = new GenomeLoc(intervals.get(currentInterval).getContig(), intervals.get(currentInterval).getStart() - 1, intervals.get(currentInterval).getStart() - 1);
|
||||
}
|
||||
return ret;// return
|
||||
|
||||
} else {
|
||||
// we need to move the next interval
|
||||
lastGenomeLocSize = proposedSize;
|
||||
|
||||
// the next sequence should start at the begining of the next contig
|
||||
Shard ret = Shard.toShard(new GenomeLoc(intervals.get(currentInterval).getContig(), nextStart, nextStart + proposedSize - 1));
|
||||
|
||||
mLoc = new GenomeLoc(intervals.get(currentInterval).getContig(), nextStart, nextStart + proposedSize - 1);
|
||||
|
||||
return ret;// return
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the next shard, if we don't have intervals to traverse over
|
||||
*
|
||||
* @param length the length of the contig
|
||||
* @param proposedSize the proposed size
|
||||
* @param nextStart the next start location
|
||||
* @return the shard to return to the user
|
||||
*/
|
||||
private Shard nonIntervaledNext(long length, long proposedSize, long nextStart) {
|
||||
// can we fit it into the current seq size?
|
||||
if (nextStart + proposedSize - 1 < length) {
|
||||
lastGenomeLocSize = proposedSize;
|
||||
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize - 1);
|
||||
return Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize - 1));
|
||||
}
|
||||
// else we can't make it in the current location, we have to stitch one together
|
||||
else {
|
||||
// lets find out the remaining size of the current contig
|
||||
long overflow = nextStart + proposedSize - 1 - length;
|
||||
logger.debug("Overflow = " + overflow + " length: " + length);
|
||||
|
||||
// set our last size counter to the remaining size
|
||||
lastGenomeLocSize = proposedSize - overflow;
|
||||
|
||||
// move to the next contig
|
||||
// the next sequence should start at the begining of the next contig
|
||||
Shard ret = Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + lastGenomeLocSize));
|
||||
|
||||
// now jump ahead to the next contig
|
||||
jumpContig();
|
||||
|
||||
// return the shard
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
/** jump to the next contig */
|
||||
private void jumpContig() {
|
||||
++seqLoc;
|
||||
|
||||
if (!(seqLoc < dic.getSequences().size())) {
|
||||
nextContig = false;
|
||||
return;
|
||||
}
|
||||
logger.debug("Next contig, name = " + dic.getSequence(seqLoc).getSequenceName());
|
||||
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), 0, 0);
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* is there another GenomeLoc to get?
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
// if we don't have an interval file, use the non interval based approach. Simple, eh?
|
||||
if (this.intervals == null) {
|
||||
return nextContig;
|
||||
} else {
|
||||
return (this.currentInterval < this.intervals.size());
|
||||
}
|
||||
}
|
||||
|
||||
/** we don't support remove */
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can not remove records from a shard iterator!");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* to be for-each(able), we must implement this method
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Iterator<Shard> iterator() {
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public interface ShardStrategy extends Iterator<Shard>, Iterable<Shard> {
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
@ -35,13 +31,9 @@ import java.util.List;
|
|||
*/
|
||||
public class ShardStrategyFactory {
|
||||
public enum SHATTER_STRATEGY {
|
||||
LINEAR, EXPONENTIAL
|
||||
LINEAR, EXPONENTIAL, READS
|
||||
}
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
private static Logger logger = Logger.getLogger(ShardStrategyFactory.class);
|
||||
|
||||
|
||||
/**
|
||||
* get a new shatter strategy
|
||||
*
|
||||
|
|
@ -53,29 +45,9 @@ public class ShardStrategyFactory {
|
|||
static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize) {
|
||||
switch (strat) {
|
||||
case LINEAR:
|
||||
return new LinearShardStrategy(dic, startingSize);
|
||||
return new LinearLocusShardStrategy(dic, startingSize);
|
||||
case EXPONENTIAL:
|
||||
return new ExpGrowthShardStrategy(dic, startingSize);
|
||||
default:
|
||||
throw new RuntimeException("Strategy: " + strat + " isn't implemented");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* get a new shatter strategy
|
||||
*
|
||||
* @param strat what's our strategy - SHATTER_STRATEGY type
|
||||
* @param dic the seq dictionary
|
||||
* @param startingSize the starting size
|
||||
* @return
|
||||
*/
|
||||
static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, List<GenomeLoc> lst) {
|
||||
switch (strat) {
|
||||
case LINEAR:
|
||||
return new LinearShardStrategy(dic, startingSize, lst);
|
||||
case EXPONENTIAL:
|
||||
return new ExpGrowthShardStrategy(dic, startingSize, lst);
|
||||
return new ExpGrowthLocusShardStrategy(dic, startingSize);
|
||||
default:
|
||||
throw new RuntimeException("Strategy: " + strat + " isn't implemented");
|
||||
}
|
||||
|
|
@ -89,16 +61,26 @@ public class ShardStrategyFactory {
|
|||
* @param convertFrom convert from this strategy
|
||||
* @return
|
||||
*/
|
||||
static public ShardStrategy transitionToShardStrategy(SHATTER_STRATEGY strat, ShardStrategy convertFrom) {
|
||||
static public ShardStrategy transitionToShardStrategy(SHATTER_STRATEGY strat, LocusShardStrategy convertFrom) {
|
||||
switch (strat) {
|
||||
case LINEAR:
|
||||
return new LinearShardStrategy(convertFrom);
|
||||
return new LinearLocusShardStrategy(convertFrom);
|
||||
case EXPONENTIAL:
|
||||
return new ExpGrowthShardStrategy(convertFrom);
|
||||
return new ExpGrowthLocusShardStrategy(convertFrom);
|
||||
default:
|
||||
throw new RuntimeException("Strategy: " + strat + " isn't implemented");
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* convert between types
|
||||
*
|
||||
* @param readCount the number of reads to include in each shard
|
||||
* @return
|
||||
*/
|
||||
static public ShardStrategy shatterByReadCount(long readCount) {
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue