Moving some of the data sharding around. A new shard catagory now exits, INTERVAL. This saved a lot of code that was mirroring the same approach in both the read and locus shard strategies.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@840 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
99524ab6d0
commit
3c3cd5bb64
|
|
@ -6,20 +6,29 @@ import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
*
|
*
|
||||||
* User: aaron
|
* Permission is hereby granted, free of charge, to any person
|
||||||
* Date: Apr 6, 2009
|
* obtaining a copy of this software and associated documentation
|
||||||
* Time: 8:23:19 PM
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
*
|
*
|
||||||
* The Broad Institute
|
* The above copyright notice and this permission notice shall be
|
||||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
* included in all copies or substantial portions of the Software.
|
||||||
* This software and its documentation are copyright 2009 by the
|
|
||||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
|
||||||
*
|
|
||||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
|
||||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
|
||||||
*
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -31,17 +30,18 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author aaron
|
* @author aaron
|
||||||
* <p/>
|
* <p/>
|
||||||
* Class IntervalReadShard
|
* Class IntervalShard
|
||||||
* <p/>
|
* <p/>
|
||||||
* This is the read shard that knowns about genomic intervals
|
* the base interval shard. All interval shards are generally the same,
|
||||||
|
* but must return their ShardType individually.
|
||||||
*/
|
*/
|
||||||
public class IntervalReadShard implements Shard {
|
public class IntervalShard implements Shard {
|
||||||
|
|
||||||
/** a collection of genomic locations to interate over */
|
/** a collection of genomic locations to interate over */
|
||||||
private GenomeLoc mSet;
|
private GenomeLoc mSet;
|
||||||
|
|
||||||
IntervalReadShard(GenomeLoc myLocation) {
|
IntervalShard(GenomeLoc myLocation) {
|
||||||
mSet = myLocation.clone();
|
mSet = myLocation.clone();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -55,7 +55,7 @@ public class IntervalReadShard implements Shard {
|
||||||
*
|
*
|
||||||
* @return READ, indicating the shard type
|
* @return READ, indicating the shard type
|
||||||
*/
|
*/
|
||||||
public ShardType getShardType() {
|
public Shard.ShardType getShardType() {
|
||||||
return Shard.ShardType.READ;
|
return ShardType.INTERVAL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -6,20 +6,29 @@ import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
*
|
*
|
||||||
* User: aaron
|
* Permission is hereby granted, free of charge, to any person
|
||||||
* Date: Apr 6, 2009
|
* obtaining a copy of this software and associated documentation
|
||||||
* Time: 7:18:19 PM
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
*
|
*
|
||||||
* The Broad Institute
|
* The above copyright notice and this permission notice shall be
|
||||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
* included in all copies or substantial portions of the Software.
|
||||||
* This software and its documentation are copyright 2009 by the
|
|
||||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
|
||||||
*
|
|
||||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
|
||||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
|
||||||
*
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,66 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* User: aaron
|
|
||||||
* Date: May 14, 2009
|
|
||||||
* Time: 3:28:50 PM
|
|
||||||
*
|
|
||||||
* The Broad Institute
|
|
||||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
|
||||||
* This software and its documentation are copyright 2009 by the
|
|
||||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
|
||||||
*
|
|
||||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
|
||||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @author aaron
|
|
||||||
* @version 1.0
|
|
||||||
* @date May 14, 2009
|
|
||||||
* <p/>
|
|
||||||
* Class LocusWindowShardStrategy
|
|
||||||
* <p/>
|
|
||||||
* This function knows how to shard on a genome loc boundry. It guarantees
|
|
||||||
* a one-to-one mapping between a GenomeLoc and shard.
|
|
||||||
*/
|
|
||||||
public class LocusIntervalShardStrategy extends LocusShardStrategy {
|
|
||||||
/**
|
|
||||||
* the constructor, taking a seq dictionary to parse out contigs
|
|
||||||
*
|
|
||||||
* @param dic the seq dictionary
|
|
||||||
* @param intervals file
|
|
||||||
*/
|
|
||||||
LocusIntervalShardStrategy(SAMSequenceDictionary dic, GenomeLocSortedSet intervals) {
|
|
||||||
super(dic, intervals);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This is how the various shards strategies implements their approach, adjusting this value
|
|
||||||
*
|
|
||||||
* @return the next shard size
|
|
||||||
*/
|
|
||||||
protected long nextShardSize() {
|
|
||||||
long nextSize = this.getCurrentInterval().getStop() - this.getCurrentInterval().getStart();
|
|
||||||
return nextSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* set the next shards size
|
|
||||||
*
|
|
||||||
* @param size adjust the next size to this
|
|
||||||
*/
|
|
||||||
public void adjustNextShardSize(long size) {
|
|
||||||
//To change body of implemented methods use File | Settings | File Templates.
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -7,7 +7,6 @@ import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* User: aaron
|
* User: aaron
|
||||||
|
|
@ -159,11 +158,11 @@ public abstract class LocusShardStrategy implements ShardStrategy {
|
||||||
|
|
||||||
if (loc.getStop() - loc.getStart() <= proposedSize) {
|
if (loc.getStop() - loc.getStart() <= proposedSize) {
|
||||||
intervals.removeRegion(loc);
|
intervals.removeRegion(loc);
|
||||||
return new IntervalReadShard(loc);
|
return new IntervalShard(loc);
|
||||||
} else {
|
} else {
|
||||||
GenomeLoc subLoc = new GenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStart() + proposedSize - 1);
|
GenomeLoc subLoc = new GenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStart() + proposedSize - 1);
|
||||||
intervals.removeRegion(subLoc);
|
intervals.removeRegion(subLoc);
|
||||||
return new IntervalReadShard(subLoc);
|
return new IntervalShard(subLoc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,118 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.dataSources.shards;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* User: aaron
|
|
||||||
* Date: May 21, 2009
|
|
||||||
* Time: 4:13:53 PM
|
|
||||||
*
|
|
||||||
* The Broad Institute
|
|
||||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
|
||||||
* This software and its documentation are copyright 2009 by the
|
|
||||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
|
||||||
*
|
|
||||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
|
||||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @author aaron
|
|
||||||
* <p/>
|
|
||||||
* Class ReadByIntervalShardStrategy
|
|
||||||
* <p/>
|
|
||||||
* Impliments the sharding strategy for reads, given a list
|
|
||||||
* of genomic locations. Shards returned will be bounded by the interval,
|
|
||||||
* but each provided interval may be split into a number of smaller regions.
|
|
||||||
*/
|
|
||||||
public class ReadIntervalShardStrategy implements ShardStrategy {
|
|
||||||
|
|
||||||
/** our storage of the genomic locations they'd like to shard over */
|
|
||||||
private final GenomeLocSortedSet regions;
|
|
||||||
|
|
||||||
/** their prefered size of the shard, we can modify this based on what we see in the shards */
|
|
||||||
private long size;
|
|
||||||
|
|
||||||
/** the sequence dictionary we'll use to lookup the contigs */
|
|
||||||
private final SAMSequenceDictionary dict;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* change the recommended shard size for the next shard we generate. The code will do it's
|
|
||||||
* best to respect this value, but there are no guarantees.
|
|
||||||
*
|
|
||||||
* @param size the next recommended shard size.
|
|
||||||
*/
|
|
||||||
public void adjustNextShardSize(long size) {
|
|
||||||
this.size = size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* the default constructor
|
|
||||||
*
|
|
||||||
* @param dict the sequence dictionary to use
|
|
||||||
* @param size the read count to iterate over
|
|
||||||
*/
|
|
||||||
ReadIntervalShardStrategy(SAMSequenceDictionary dict, long size, GenomeLocSortedSet locations) {
|
|
||||||
if (locations == null || locations.isEmpty()) {
|
|
||||||
throw new StingException("ReadIntervalShardStrategy: genomic regions list is empty.");
|
|
||||||
}
|
|
||||||
this.regions = locations.clone();
|
|
||||||
this.size = size;
|
|
||||||
this.dict = dict;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* returns true if there are additional shards
|
|
||||||
* @return false if we're done processing shards
|
|
||||||
*/
|
|
||||||
public boolean hasNext() {
|
|
||||||
return (!regions.isEmpty());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* gets the next Shard
|
|
||||||
* @return the next shard
|
|
||||||
*/
|
|
||||||
public Shard next() {
|
|
||||||
if ((this.regions == null) || (regions.isEmpty())) {
|
|
||||||
throw new StingException("ReadIntervalShardStrategy: genomic regions list is empty in next() function.");
|
|
||||||
}
|
|
||||||
|
|
||||||
// get the first region in the list
|
|
||||||
GenomeLoc loc = regions.iterator().next();
|
|
||||||
|
|
||||||
if (loc.getStop() - loc.getStart() <= this.size) {
|
|
||||||
regions.removeRegion(loc);
|
|
||||||
return new IntervalReadShard(loc);
|
|
||||||
} else {
|
|
||||||
GenomeLoc subLoc = new GenomeLoc(loc.getContigIndex(),loc.getStart(),loc.getStart()+size-1);
|
|
||||||
regions.removeRegion(subLoc);
|
|
||||||
return new IntervalReadShard(subLoc);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* we don't support the remove command
|
|
||||||
*/
|
|
||||||
public void remove() {
|
|
||||||
throw new UnsupportedOperationException("ShardStrategies don't support remove()");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* makes the ReadIntervalShard iterable, i.e. usable in a for loop.
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public Iterator<Shard> iterator() {
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -4,16 +4,29 @@ import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
/**
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
*
|
*
|
||||||
* The Broad Institute
|
* Permission is hereby granted, free of charge, to any person
|
||||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
* obtaining a copy of this software and associated documentation
|
||||||
* This software and its documentation are copyright 2009 by the
|
* files (the "Software"), to deal in the Software without
|
||||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
*
|
*
|
||||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
* The above copyright notice and this permission notice shall be
|
||||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
* included in all copies or substantial portions of the Software.
|
||||||
*
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ import java.io.Serializable;
|
||||||
*/
|
*/
|
||||||
public interface Shard extends Serializable {
|
public interface Shard extends Serializable {
|
||||||
enum ShardType {
|
enum ShardType {
|
||||||
READ, LOCUS
|
READ, LOCUS, INTERVAL
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @return the genome location represented by this shard */
|
/** @return the genome location represented by this shard */
|
||||||
|
|
|
||||||
|
|
@ -2,12 +2,9 @@ package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* User: aaron
|
* User: aaron
|
||||||
|
|
@ -81,10 +78,9 @@ public class ShardStrategyFactory {
|
||||||
return new LinearLocusShardStrategy(dic, startingSize, lst);
|
return new LinearLocusShardStrategy(dic, startingSize, lst);
|
||||||
case EXPONENTIAL:
|
case EXPONENTIAL:
|
||||||
return new ExpGrowthLocusShardStrategy(dic, startingSize, lst);
|
return new ExpGrowthLocusShardStrategy(dic, startingSize, lst);
|
||||||
case READS:
|
|
||||||
return new ReadIntervalShardStrategy(dic, startingSize, lst);
|
|
||||||
case INTERVAL:
|
case INTERVAL:
|
||||||
return new LocusIntervalShardStrategy(dic, lst);
|
case READS:
|
||||||
|
return new IntervalShardStrategy(startingSize, lst);
|
||||||
default:
|
default:
|
||||||
throw new StingException("Strategy: " + strat + " isn't implemented");
|
throw new StingException("Strategy: " + strat + " isn't implemented");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -148,14 +148,16 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
reads.getDownsamplingFraction(),
|
reads.getDownsamplingFraction(),
|
||||||
reads.getMaxOnTheFlySorts(),
|
reads.getMaxOnTheFlySorts(),
|
||||||
reads.getSafetyChecking());
|
reads.getSafetyChecking());
|
||||||
} else if (shard.getShardType() == Shard.ShardType.LOCUS) {
|
} else if (shard.getShardType() == Shard.ShardType.LOCUS ||
|
||||||
|
shard.getShardType() == Shard.ShardType.INTERVAL) {
|
||||||
iterator = seekLocus(shard.getGenomeLoc());
|
iterator = seekLocus(shard.getGenomeLoc());
|
||||||
iterator = TraversalEngine.applyDecoratingIterators(false,
|
iterator = TraversalEngine.applyDecoratingIterators(false,
|
||||||
iterator,
|
iterator,
|
||||||
reads.getDownsamplingFraction(),
|
reads.getDownsamplingFraction(),
|
||||||
reads.getMaxOnTheFlySorts(),
|
reads.getMaxOnTheFlySorts(),
|
||||||
reads.getSafetyChecking());
|
reads.getSafetyChecking());
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
throw new StingException("seek: Unknown shard type");
|
throw new StingException("seek: Unknown shard type");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -38,9 +38,7 @@ import java.util.ArrayList;
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/** Shards and schedules data in manageable chunks. */
|
||||||
* Shards and schedules data in manageable chunks.
|
|
||||||
*/
|
|
||||||
public abstract class MicroScheduler {
|
public abstract class MicroScheduler {
|
||||||
private static long SHARD_SIZE = 100000L;
|
private static long SHARD_SIZE = 100000L;
|
||||||
|
|
||||||
|
|
@ -59,14 +57,13 @@ public abstract class MicroScheduler {
|
||||||
* @param nThreadsToUse Number of threads to utilize.
|
* @param nThreadsToUse Number of threads to utilize.
|
||||||
* @return The best-fit microscheduler.
|
* @return The best-fit microscheduler.
|
||||||
*/
|
*/
|
||||||
public static MicroScheduler create( Walker walker, Reads reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods, int nThreadsToUse ) {
|
public static MicroScheduler create(Walker walker, Reads reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods, int nThreadsToUse) {
|
||||||
if( walker instanceof TreeReducible && nThreadsToUse > 1 ) {
|
if (walker instanceof TreeReducible && nThreadsToUse > 1) {
|
||||||
logger.info("Creating hierarchical microscheduler");
|
logger.info("Creating hierarchical microscheduler");
|
||||||
return new HierarchicalMicroScheduler( walker, reads, ref, rods, nThreadsToUse );
|
return new HierarchicalMicroScheduler(walker, reads, ref, rods, nThreadsToUse);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
logger.info("Creating linear microscheduler");
|
logger.info("Creating linear microscheduler");
|
||||||
return new LinearMicroScheduler( walker, reads, ref, rods );
|
return new LinearMicroScheduler(walker, reads, ref, rods);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -75,16 +72,16 @@ public abstract class MicroScheduler {
|
||||||
* @param reads The reads.
|
* @param reads The reads.
|
||||||
* @param refFile File pointer to the reference.
|
* @param refFile File pointer to the reference.
|
||||||
*/
|
*/
|
||||||
protected MicroScheduler( Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods ) {
|
protected MicroScheduler(Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||||
if (walker instanceof ReadWalker) {
|
if (walker instanceof ReadWalker) {
|
||||||
traversalEngine = new TraverseReads(reads.getReadsFiles(), refFile, rods);
|
traversalEngine = new TraverseReads(reads.getReadsFiles(), refFile, rods);
|
||||||
} else {
|
} else {
|
||||||
traversalEngine = new TraverseLoci(reads.getReadsFiles(), refFile, rods);
|
traversalEngine = new TraverseLoci(reads.getReadsFiles(), refFile, rods);
|
||||||
}
|
}
|
||||||
|
|
||||||
this.reads = getReadsDataSource( reads );
|
this.reads = getReadsDataSource(reads);
|
||||||
this.reference = openReferenceSequenceFile( refFile );
|
this.reference = openReferenceSequenceFile(refFile);
|
||||||
this.rods = getReferenceOrderedDataSources( rods );
|
this.rods = getReferenceOrderedDataSources(rods);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -102,7 +99,7 @@ public abstract class MicroScheduler {
|
||||||
* @param intervals A list of intervals over which to walk. Null for whole dataset.
|
* @param intervals A list of intervals over which to walk. Null for whole dataset.
|
||||||
* @return the return type of the walker
|
* @return the return type of the walker
|
||||||
*/
|
*/
|
||||||
public abstract Object execute( Walker walker, GenomeLocSortedSet intervals);
|
public abstract Object execute(Walker walker, GenomeLocSortedSet intervals);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the sharding strategy given a driving data source.
|
* Get the sharding strategy given a driving data source.
|
||||||
|
|
@ -111,32 +108,39 @@ public abstract class MicroScheduler {
|
||||||
* @param intervals Intervals to use when limiting sharding.
|
* @param intervals Intervals to use when limiting sharding.
|
||||||
* @return Sharding strategy for this driving data source.
|
* @return Sharding strategy for this driving data source.
|
||||||
*/
|
*/
|
||||||
protected ShardStrategy getShardStrategy( Walker walker, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals ) {
|
protected ShardStrategy getShardStrategy(Walker walker, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals) {
|
||||||
ShardStrategy shardStrategy = null;
|
ShardStrategy shardStrategy = null;
|
||||||
|
ShardStrategyFactory.SHATTER_STRATEGY shardType;
|
||||||
|
if (walker instanceof LocusWalker) {
|
||||||
|
if (intervals != null) {
|
||||||
|
shardType = (walker.isReduceByInterval()) ?
|
||||||
|
ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL :
|
||||||
|
ShardStrategyFactory.SHATTER_STRATEGY.LINEAR;
|
||||||
|
|
||||||
if( walker instanceof LocusWalker ) {
|
shardStrategy = ShardStrategyFactory.shatter(shardType,
|
||||||
if( intervals != null ) {
|
drivingDataSource.getSequenceDictionary(),
|
||||||
ShardStrategyFactory.SHATTER_STRATEGY shardType = (walker.isReduceByInterval()) ?
|
SHARD_SIZE,
|
||||||
ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL :
|
intervals);
|
||||||
ShardStrategyFactory.SHATTER_STRATEGY.LINEAR;
|
} else
|
||||||
|
shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,
|
||||||
|
drivingDataSource.getSequenceDictionary(),
|
||||||
|
SHARD_SIZE);
|
||||||
|
|
||||||
shardStrategy = ShardStrategyFactory.shatter( shardType,
|
} else if (walker instanceof ReadWalker) {
|
||||||
drivingDataSource.getSequenceDictionary(),
|
|
||||||
SHARD_SIZE,
|
shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS;
|
||||||
intervals );
|
|
||||||
|
if (intervals != null) {
|
||||||
|
shardStrategy = ShardStrategyFactory.shatter(shardType,
|
||||||
|
drivingDataSource.getSequenceDictionary(),
|
||||||
|
SHARD_SIZE,
|
||||||
|
intervals);
|
||||||
|
} else {
|
||||||
|
shardStrategy = ShardStrategyFactory.shatter(shardType,
|
||||||
|
drivingDataSource.getSequenceDictionary(),
|
||||||
|
SHARD_SIZE);
|
||||||
}
|
}
|
||||||
else
|
} else
|
||||||
shardStrategy = ShardStrategyFactory.shatter( ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,
|
|
||||||
drivingDataSource.getSequenceDictionary(),
|
|
||||||
SHARD_SIZE );
|
|
||||||
|
|
||||||
}
|
|
||||||
else if( walker instanceof ReadWalker ) {
|
|
||||||
shardStrategy = ShardStrategyFactory.shatter( ShardStrategyFactory.SHATTER_STRATEGY.READS,
|
|
||||||
drivingDataSource.getSequenceDictionary(),
|
|
||||||
SHARD_SIZE );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
throw new StingException("Unable to support walker of type" + walker.getClass().getName());
|
throw new StingException("Unable to support walker of type" + walker.getClass().getName());
|
||||||
|
|
||||||
return shardStrategy;
|
return shardStrategy;
|
||||||
|
|
@ -147,20 +151,20 @@ public abstract class MicroScheduler {
|
||||||
* @param shard The section of data to view.
|
* @param shard The section of data to view.
|
||||||
* @return An accessor for all the data in this shard.
|
* @return An accessor for all the data in this shard.
|
||||||
*/
|
*/
|
||||||
protected ShardDataProvider getShardDataProvider( Shard shard ) {
|
protected ShardDataProvider getShardDataProvider(Shard shard) {
|
||||||
return new ShardDataProvider( shard, reads, reference, rods );
|
return new ShardDataProvider(shard, reads, reference, rods);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets a data source for the given set of reads.
|
* Gets a data source for the given set of reads.
|
||||||
* @return A data source for the given set of reads.
|
* @return A data source for the given set of reads.
|
||||||
*/
|
*/
|
||||||
private SAMDataSource getReadsDataSource( Reads reads ) {
|
private SAMDataSource getReadsDataSource(Reads reads) {
|
||||||
// By reference traversals are happy with no reads. Make sure that case is handled.
|
// By reference traversals are happy with no reads. Make sure that case is handled.
|
||||||
if( reads.getReadsFiles().size() == 0 )
|
if (reads.getReadsFiles().size() == 0)
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
SAMDataSource dataSource = new SAMDataSource( reads );
|
SAMDataSource dataSource = new SAMDataSource(reads);
|
||||||
|
|
||||||
// Side effect: initialize the traversal engine with reads data.
|
// Side effect: initialize the traversal engine with reads data.
|
||||||
// TODO: Give users a dedicated way of getting the header so that the MicroScheduler
|
// TODO: Give users a dedicated way of getting the header so that the MicroScheduler
|
||||||
|
|
@ -174,10 +178,10 @@ public abstract class MicroScheduler {
|
||||||
* Open the reference-ordered data sources.
|
* Open the reference-ordered data sources.
|
||||||
* @return A list of reference-ordered data sources.
|
* @return A list of reference-ordered data sources.
|
||||||
*/
|
*/
|
||||||
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources( List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
private List<ReferenceOrderedDataSource> getReferenceOrderedDataSources(List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||||
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
||||||
for( ReferenceOrderedData<? extends ReferenceOrderedDatum> rod: rods )
|
for (ReferenceOrderedData<? extends ReferenceOrderedDatum> rod : rods)
|
||||||
dataSources.add( new ReferenceOrderedDataSource(rod) );
|
dataSources.add(new ReferenceOrderedDataSource(rod));
|
||||||
return dataSources;
|
return dataSources;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -186,12 +190,12 @@ public abstract class MicroScheduler {
|
||||||
* @param refFile Handle to a reference sequence file. Non-null.
|
* @param refFile Handle to a reference sequence file. Non-null.
|
||||||
* @return A thread-safe file wrapper.
|
* @return A thread-safe file wrapper.
|
||||||
*/
|
*/
|
||||||
private IndexedFastaSequenceFile openReferenceSequenceFile( File refFile ) {
|
private IndexedFastaSequenceFile openReferenceSequenceFile(File refFile) {
|
||||||
IndexedFastaSequenceFile ref = null;
|
IndexedFastaSequenceFile ref = null;
|
||||||
try {
|
try {
|
||||||
ref = new IndexedFastaSequenceFile(refFile);
|
ref = new IndexedFastaSequenceFile(refFile);
|
||||||
}
|
}
|
||||||
catch( FileNotFoundException ex ) {
|
catch (FileNotFoundException ex) {
|
||||||
throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex);
|
throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex);
|
||||||
}
|
}
|
||||||
GenomeLoc.setupRefContigOrdering(ref);
|
GenomeLoc.setupRefContigOrdering(ref);
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.dataSources.providers.ReadView;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.providers.ReadReferenceView;
|
import org.broadinstitute.sting.gatk.dataSources.providers.ReadReferenceView;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard;
|
import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard;
|
||||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||||
|
import org.broadinstitute.sting.gatk.dataSources.shards.IntervalShard;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
|
@ -79,7 +80,12 @@ public class TraverseReads extends TraversalEngine {
|
||||||
ShardDataProvider dataProvider,
|
ShardDataProvider dataProvider,
|
||||||
T sum) {
|
T sum) {
|
||||||
|
|
||||||
logger.debug(String.format("TraverseReads.traverse Genomic interval is %s", ((ReadShard) shard).getSize()));
|
if (shard instanceof ReadShard) {
|
||||||
|
logger.debug(String.format("TraverseReads.traverse Genomic interval is %s", ((ReadShard) shard).getSize()));
|
||||||
|
} else if (shard instanceof IntervalShard) {
|
||||||
|
logger.debug(String.format("TraverseReads.traverse Genomic interval is %s", ((IntervalShard) shard).getGenomeLoc()));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (!(walker instanceof ReadWalker))
|
if (!(walker instanceof ReadWalker))
|
||||||
throw new IllegalArgumentException("Walker isn't a read walker!");
|
throw new IllegalArgumentException("Walker isn't a read walker!");
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@ import net.sf.samtools.SAMFileHeader;
|
||||||
* <p/>
|
* <p/>
|
||||||
* Tests the ReadIntervalShardStrategy class
|
* Tests the ReadIntervalShardStrategy class
|
||||||
*/
|
*/
|
||||||
public class ReadIntervalShardStrategyTest extends BaseTest {
|
public class IntervalShardStrategyTest extends BaseTest {
|
||||||
|
|
||||||
private GenomeLocSortedSet mSortedSet = null;
|
private GenomeLocSortedSet mSortedSet = null;
|
||||||
private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
|
private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
|
||||||
|
|
@ -60,19 +60,21 @@ public class ReadIntervalShardStrategyTest extends BaseTest {
|
||||||
|
|
||||||
@Test(expected = StingException.class)
|
@Test(expected = StingException.class)
|
||||||
public void testExceptionOnEmpty() {
|
public void testExceptionOnEmpty() {
|
||||||
ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 100, mSortedSet);
|
IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSingleChromosomeFunctionality() {
|
public void testSingleChromosomeFunctionality() {
|
||||||
GenomeLoc loc = new GenomeLoc(1, 1, 1000);
|
GenomeLoc loc = new GenomeLoc(1, 1, 1000);
|
||||||
mSortedSet.add(loc);
|
mSortedSet.add(loc);
|
||||||
ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 100, mSortedSet);
|
IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet);
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
|
Shard d = null;
|
||||||
while (strat.hasNext()) {
|
while (strat.hasNext()) {
|
||||||
Shard d = strat.next();
|
d = strat.next();
|
||||||
counter++;
|
counter++;
|
||||||
}
|
}
|
||||||
|
assertTrue(d instanceof IntervalShard);
|
||||||
assertEquals(10, counter);
|
assertEquals(10, counter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -82,12 +84,14 @@ public class ReadIntervalShardStrategyTest extends BaseTest {
|
||||||
GenomeLoc loc = new GenomeLoc(x, 1, 1000);
|
GenomeLoc loc = new GenomeLoc(x, 1, 1000);
|
||||||
mSortedSet.add(loc);
|
mSortedSet.add(loc);
|
||||||
}
|
}
|
||||||
ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 100, mSortedSet);
|
IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet);
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
|
Shard d = null;
|
||||||
while (strat.hasNext()) {
|
while (strat.hasNext()) {
|
||||||
Shard d = strat.next();
|
d = strat.next();
|
||||||
counter++;
|
counter++;
|
||||||
}
|
}
|
||||||
|
assertTrue(d instanceof IntervalShard);
|
||||||
assertEquals(50, counter);
|
assertEquals(50, counter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -97,7 +101,7 @@ public class ReadIntervalShardStrategyTest extends BaseTest {
|
||||||
GenomeLoc loc = new GenomeLoc(x, 1, 1000);
|
GenomeLoc loc = new GenomeLoc(x, 1, 1000);
|
||||||
mSortedSet.add(loc);
|
mSortedSet.add(loc);
|
||||||
}
|
}
|
||||||
ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 789, mSortedSet);
|
IntervalShardStrategy strat = new IntervalShardStrategy(789, mSortedSet);
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
while (strat.hasNext()) {
|
while (strat.hasNext()) {
|
||||||
Shard d = strat.next();
|
Shard d = strat.next();
|
||||||
|
|
@ -113,11 +117,28 @@ public class ReadIntervalShardStrategyTest extends BaseTest {
|
||||||
assertEquals(10, counter);
|
assertEquals(10, counter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInfiniteShardSize() {
|
||||||
|
for (int x = 0; x < 5; x++) {
|
||||||
|
GenomeLoc loc = new GenomeLoc(x, 1, 1000);
|
||||||
|
mSortedSet.add(loc);
|
||||||
|
}
|
||||||
|
IntervalShardStrategy strat = new IntervalShardStrategy(Long.MAX_VALUE, mSortedSet);
|
||||||
|
int counter = 0;
|
||||||
|
while (strat.hasNext()) {
|
||||||
|
Shard d = strat.next();
|
||||||
|
assertEquals(1000, d.getGenomeLoc().getStop());
|
||||||
|
counter++;
|
||||||
|
}
|
||||||
|
assertEquals(5, counter);
|
||||||
|
}
|
||||||
|
|
||||||
@Test(expected = UnsupportedOperationException.class)
|
@Test(expected = UnsupportedOperationException.class)
|
||||||
public void testRemove() {
|
public void testRemove() {
|
||||||
GenomeLoc loc = new GenomeLoc(1, 1, 1000);
|
GenomeLoc loc = new GenomeLoc(1, 1, 1000);
|
||||||
mSortedSet.add(loc);
|
mSortedSet.add(loc);
|
||||||
ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 100, mSortedSet);
|
IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet);
|
||||||
strat.remove();
|
strat.remove();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||||
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
|
import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
@ -42,9 +41,9 @@ import net.sf.samtools.SAMFileHeader;
|
||||||
* <p/>
|
* <p/>
|
||||||
* Tests for the IntervalReadShard class.
|
* Tests for the IntervalReadShard class.
|
||||||
*/
|
*/
|
||||||
public class IntervalReadShardTest extends BaseTest {
|
public class IntervalShardTest extends BaseTest {
|
||||||
|
|
||||||
private IntervalReadShard shard = null;
|
private IntervalShard intervalShard = null;
|
||||||
private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
|
private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
|
||||||
private static final int NUMBER_OF_CHROMOSOMES = 5;
|
private static final int NUMBER_OF_CHROMOSOMES = 5;
|
||||||
private static final int STARTING_CHROMOSOME = 1;
|
private static final int STARTING_CHROMOSOME = 1;
|
||||||
|
|
@ -59,15 +58,15 @@ public class IntervalReadShardTest extends BaseTest {
|
||||||
@Test
|
@Test
|
||||||
public void simpleReturn() {
|
public void simpleReturn() {
|
||||||
GenomeLoc loc = new GenomeLoc(1, 1, 100);
|
GenomeLoc loc = new GenomeLoc(1, 1, 100);
|
||||||
shard = new IntervalReadShard(loc);
|
intervalShard = new IntervalShard(loc);
|
||||||
assertTrue(shard.getGenomeLoc().equals(loc));
|
assertTrue(intervalShard.getGenomeLoc().equals(loc));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void ensureNotReference() {
|
public void ensureNotReference() {
|
||||||
GenomeLoc loc = new GenomeLoc(1, 1, 100);
|
GenomeLoc loc = new GenomeLoc(1, 1, 100);
|
||||||
shard = new IntervalReadShard(loc);
|
intervalShard = new IntervalShard(loc);
|
||||||
assertTrue(shard.getGenomeLoc() != loc && shard.getGenomeLoc().equals(loc));
|
assertTrue(intervalShard.getGenomeLoc() != loc && intervalShard.getGenomeLoc().equals(loc));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -8,6 +8,7 @@ import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -38,42 +39,65 @@ import net.sf.samtools.SAMFileHeader;
|
||||||
/**
|
/**
|
||||||
* @author aaron
|
* @author aaron
|
||||||
* <p/>
|
* <p/>
|
||||||
* Class LocusIntervalShardStrategyTest
|
* Class LocusShardStrategyTest
|
||||||
* <p/>
|
* <p/>
|
||||||
* Tests the LocusIntervalShardStrategy class.
|
* Test for the Locus Shard Strategy
|
||||||
*/
|
*/
|
||||||
public class LocusIntervalShardStrategyTest extends BaseTest {
|
public class LinearLocusShardStrategyTest extends BaseTest {
|
||||||
|
|
||||||
private GenomeLocSortedSet mSortedSet = null;
|
private GenomeLocSortedSet mSortedSet = null;
|
||||||
private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
|
private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
|
||||||
private static final int NUMBER_OF_CHROMOSOMES = 5;
|
private static final int NUMBER_OF_CHROMOSOMES = 5;
|
||||||
private static final int STARTING_CHROMOSOME = 1;
|
private static final int STARTING_CHROMOSOME = 1;
|
||||||
private static final int CHROMOSOME_SIZE = 1000;
|
private static final int CHROMOSOME_SIZE = 1000;
|
||||||
private LocusIntervalShardStrategy strat = null;
|
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setup() {
|
public void setup() {
|
||||||
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
|
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
|
||||||
mSortedSet = new GenomeLocSortedSet();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testOneToOneness() {
|
public void testSetup() {
|
||||||
for (int x = 0; x < 100; x++) {
|
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500);
|
||||||
GenomeLoc loc = new GenomeLoc(0,(x*10)+1, (x*10)+8);
|
|
||||||
mSortedSet.add(loc);
|
|
||||||
}
|
|
||||||
strat = new LocusIntervalShardStrategy(header.getSequenceDictionary(),mSortedSet);
|
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
while (strat.hasNext()) {
|
while(strat.hasNext()) {
|
||||||
|
Shard d = strat.next();
|
||||||
|
assertTrue(d instanceof LocusShard);
|
||||||
|
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 499);
|
||||||
++counter;
|
++counter;
|
||||||
GenomeLoc loc = strat.next().getGenomeLoc();
|
|
||||||
long stop = loc.getStop();
|
|
||||||
long start = loc.getStart();
|
|
||||||
long length = stop - start;
|
|
||||||
assertTrue(length == 7);
|
|
||||||
}
|
}
|
||||||
assertTrue(counter == 100);
|
assertTrue(counter == 10);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAdjustSize() {
|
||||||
|
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500);
|
||||||
|
strat.adjustNextShardSize(1000);
|
||||||
|
int counter = 0;
|
||||||
|
while(strat.hasNext()) {
|
||||||
|
Shard d = strat.next();
|
||||||
|
assertTrue(d instanceof LocusShard);
|
||||||
|
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 999);
|
||||||
|
++counter;
|
||||||
|
}
|
||||||
|
assertTrue(counter == 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUnevenSplit() {
|
||||||
|
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 600);
|
||||||
|
int counter = 0;
|
||||||
|
while(strat.hasNext()) {
|
||||||
|
Shard d = strat.next();
|
||||||
|
assertTrue(d instanceof LocusShard);
|
||||||
|
if (counter % 2 == 0) {
|
||||||
|
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 599);
|
||||||
|
} else {
|
||||||
|
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 399);
|
||||||
|
}
|
||||||
|
++counter;
|
||||||
|
}
|
||||||
|
assertTrue(counter == 10);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -2,20 +2,14 @@ package org.broadinstitute.sting.gatk.dataSources.shards;
|
||||||
|
|
||||||
import static junit.framework.Assert.assertEquals;
|
import static junit.framework.Assert.assertEquals;
|
||||||
import static junit.framework.Assert.fail;
|
import static junit.framework.Assert.fail;
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
|
import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
|
||||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
|
||||||
import org.junit.*;
|
import org.junit.*;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* User: aaron
|
* User: aaron
|
||||||
|
|
@ -63,7 +57,7 @@ public class ShardStrategyFactoryTest extends BaseTest {
|
||||||
GenomeLoc l = new GenomeLoc(0,1,100);
|
GenomeLoc l = new GenomeLoc(0,1,100);
|
||||||
set.add(l);
|
set.add(l);
|
||||||
ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,header.getSequenceDictionary(),100,set);
|
ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,header.getSequenceDictionary(),100,set);
|
||||||
assertTrue(st instanceof ReadIntervalShardStrategy);
|
assertTrue(st instanceof IntervalShardStrategy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue