Moving some of the data sharding around. A new shard catagory now exits, INTERVAL. This saved a lot of code that was mirroring the same approach in both the read and locus shard strategies.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@840 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-05-27 18:24:31 +00:00
parent 99524ab6d0
commit 3c3cd5bb64
16 changed files with 217 additions and 325 deletions

View File

@ -6,20 +6,29 @@ import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import java.util.List; import java.util.List;
/** /*
* Copyright (c) 2009 The Broad Institute
* *
* User: aaron * Permission is hereby granted, free of charge, to any person
* Date: Apr 6, 2009 * obtaining a copy of this software and associated documentation
* Time: 8:23:19 PM * files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
* *
* The Broad Institute * The above copyright notice and this permission notice shall be
* SOFTWARE COPYRIGHT NOTICE AGREEMENT * included in all copies or substantial portions of the Software.
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/ */

View File

@ -1,6 +1,5 @@
package org.broadinstitute.sting.gatk.dataSources.shards; package org.broadinstitute.sting.gatk.dataSources.shards;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
@ -32,16 +31,17 @@ import org.broadinstitute.sting.utils.GenomeLoc;
/** /**
* @author aaron * @author aaron
* <p/> * <p/>
* Class IntervalReadShard * Class IntervalShard
* <p/> * <p/>
* This is the read shard that knowns about genomic intervals * the base interval shard. All interval shards are generally the same,
* but must return their ShardType individually.
*/ */
public class IntervalReadShard implements Shard { public class IntervalShard implements Shard {
/** a collection of genomic locations to interate over */ /** a collection of genomic locations to interate over */
private GenomeLoc mSet; private GenomeLoc mSet;
IntervalReadShard(GenomeLoc myLocation) { IntervalShard(GenomeLoc myLocation) {
mSet = myLocation.clone(); mSet = myLocation.clone();
} }
@ -55,7 +55,7 @@ public class IntervalReadShard implements Shard {
* *
* @return READ, indicating the shard type * @return READ, indicating the shard type
*/ */
public ShardType getShardType() { public Shard.ShardType getShardType() {
return Shard.ShardType.READ; return ShardType.INTERVAL;
} }
} }

View File

@ -6,20 +6,29 @@ import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import java.util.List; import java.util.List;
/** /*
* Copyright (c) 2009 The Broad Institute
* *
* User: aaron * Permission is hereby granted, free of charge, to any person
* Date: Apr 6, 2009 * obtaining a copy of this software and associated documentation
* Time: 7:18:19 PM * files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
* *
* The Broad Institute * The above copyright notice and this permission notice shall be
* SOFTWARE COPYRIGHT NOTICE AGREEMENT * included in all copies or substantial portions of the Software.
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/ */

View File

@ -1,66 +0,0 @@
package org.broadinstitute.sting.gatk.dataSources.shards;
import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import java.util.List;
/**
*
* User: aaron
* Date: May 14, 2009
* Time: 3:28:50 PM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* @version 1.0
* @date May 14, 2009
* <p/>
* Class LocusWindowShardStrategy
* <p/>
* This function knows how to shard on a genome loc boundry. It guarantees
* a one-to-one mapping between a GenomeLoc and shard.
*/
public class LocusIntervalShardStrategy extends LocusShardStrategy {
/**
* the constructor, taking a seq dictionary to parse out contigs
*
* @param dic the seq dictionary
* @param intervals file
*/
LocusIntervalShardStrategy(SAMSequenceDictionary dic, GenomeLocSortedSet intervals) {
super(dic, intervals);
}
/**
* This is how the various shards strategies implements their approach, adjusting this value
*
* @return the next shard size
*/
protected long nextShardSize() {
long nextSize = this.getCurrentInterval().getStop() - this.getCurrentInterval().getStart();
return nextSize;
}
/**
* set the next shards size
*
* @param size adjust the next size to this
*/
public void adjustNextShardSize(long size) {
//To change body of implemented methods use File | Settings | File Templates.
}
}

View File

@ -7,7 +7,6 @@ import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
/** /**
* *
* User: aaron * User: aaron
@ -159,11 +158,11 @@ public abstract class LocusShardStrategy implements ShardStrategy {
if (loc.getStop() - loc.getStart() <= proposedSize) { if (loc.getStop() - loc.getStart() <= proposedSize) {
intervals.removeRegion(loc); intervals.removeRegion(loc);
return new IntervalReadShard(loc); return new IntervalShard(loc);
} else { } else {
GenomeLoc subLoc = new GenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStart() + proposedSize - 1); GenomeLoc subLoc = new GenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStart() + proposedSize - 1);
intervals.removeRegion(subLoc); intervals.removeRegion(subLoc);
return new IntervalReadShard(subLoc); return new IntervalShard(subLoc);
} }
} }

View File

@ -1,118 +0,0 @@
package org.broadinstitute.sting.gatk.dataSources.shards;
import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.StingException;
import java.util.Iterator;
import java.util.List;
/**
*
* User: aaron
* Date: May 21, 2009
* Time: 4:13:53 PM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* <p/>
* Class ReadByIntervalShardStrategy
* <p/>
* Impliments the sharding strategy for reads, given a list
* of genomic locations. Shards returned will be bounded by the interval,
* but each provided interval may be split into a number of smaller regions.
*/
public class ReadIntervalShardStrategy implements ShardStrategy {
/** our storage of the genomic locations they'd like to shard over */
private final GenomeLocSortedSet regions;
/** their prefered size of the shard, we can modify this based on what we see in the shards */
private long size;
/** the sequence dictionary we'll use to lookup the contigs */
private final SAMSequenceDictionary dict;
/**
* change the recommended shard size for the next shard we generate. The code will do it's
* best to respect this value, but there are no guarantees.
*
* @param size the next recommended shard size.
*/
public void adjustNextShardSize(long size) {
this.size = size;
}
/**
* the default constructor
*
* @param dict the sequence dictionary to use
* @param size the read count to iterate over
*/
ReadIntervalShardStrategy(SAMSequenceDictionary dict, long size, GenomeLocSortedSet locations) {
if (locations == null || locations.isEmpty()) {
throw new StingException("ReadIntervalShardStrategy: genomic regions list is empty.");
}
this.regions = locations.clone();
this.size = size;
this.dict = dict;
}
/**
* returns true if there are additional shards
* @return false if we're done processing shards
*/
public boolean hasNext() {
return (!regions.isEmpty());
}
/**
* gets the next Shard
* @return the next shard
*/
public Shard next() {
if ((this.regions == null) || (regions.isEmpty())) {
throw new StingException("ReadIntervalShardStrategy: genomic regions list is empty in next() function.");
}
// get the first region in the list
GenomeLoc loc = regions.iterator().next();
if (loc.getStop() - loc.getStart() <= this.size) {
regions.removeRegion(loc);
return new IntervalReadShard(loc);
} else {
GenomeLoc subLoc = new GenomeLoc(loc.getContigIndex(),loc.getStart(),loc.getStart()+size-1);
regions.removeRegion(subLoc);
return new IntervalReadShard(subLoc);
}
}
/**
* we don't support the remove command
*/
public void remove() {
throw new UnsupportedOperationException("ShardStrategies don't support remove()");
}
/**
* makes the ReadIntervalShard iterable, i.e. usable in a for loop.
* @return
*/
public Iterator<Shard> iterator() {
return this;
}
}

View File

@ -4,16 +4,29 @@ import net.sf.samtools.SAMSequenceDictionary;
import java.util.Iterator; import java.util.Iterator;
/** /*
* Copyright (c) 2009 The Broad Institute
* *
* The Broad Institute * Permission is hereby granted, free of charge, to any person
* SOFTWARE COPYRIGHT NOTICE AGREEMENT * obtaining a copy of this software and associated documentation
* This software and its documentation are copyright 2009 by the * files (the "Software"), to deal in the Software without
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved. * restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
* *
* This software is supplied without any warranty or guaranteed support whatsoever. Neither * The above copyright notice and this permission notice shall be
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. * included in all copies or substantial portions of the Software.
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/ */

View File

@ -30,7 +30,7 @@ import java.io.Serializable;
*/ */
public interface Shard extends Serializable { public interface Shard extends Serializable {
enum ShardType { enum ShardType {
READ, LOCUS READ, LOCUS, INTERVAL
} }
/** @return the genome location represented by this shard */ /** @return the genome location represented by this shard */

View File

@ -2,12 +2,9 @@ package org.broadinstitute.sting.gatk.dataSources.shards;
import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceDictionary;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import java.util.List;
/** /**
* *
* User: aaron * User: aaron
@ -81,10 +78,9 @@ public class ShardStrategyFactory {
return new LinearLocusShardStrategy(dic, startingSize, lst); return new LinearLocusShardStrategy(dic, startingSize, lst);
case EXPONENTIAL: case EXPONENTIAL:
return new ExpGrowthLocusShardStrategy(dic, startingSize, lst); return new ExpGrowthLocusShardStrategy(dic, startingSize, lst);
case READS:
return new ReadIntervalShardStrategy(dic, startingSize, lst);
case INTERVAL: case INTERVAL:
return new LocusIntervalShardStrategy(dic, lst); case READS:
return new IntervalShardStrategy(startingSize, lst);
default: default:
throw new StingException("Strategy: " + strat + " isn't implemented"); throw new StingException("Strategy: " + strat + " isn't implemented");
} }

View File

@ -148,14 +148,16 @@ public class SAMDataSource implements SimpleDataSource {
reads.getDownsamplingFraction(), reads.getDownsamplingFraction(),
reads.getMaxOnTheFlySorts(), reads.getMaxOnTheFlySorts(),
reads.getSafetyChecking()); reads.getSafetyChecking());
} else if (shard.getShardType() == Shard.ShardType.LOCUS) { } else if (shard.getShardType() == Shard.ShardType.LOCUS ||
shard.getShardType() == Shard.ShardType.INTERVAL) {
iterator = seekLocus(shard.getGenomeLoc()); iterator = seekLocus(shard.getGenomeLoc());
iterator = TraversalEngine.applyDecoratingIterators(false, iterator = TraversalEngine.applyDecoratingIterators(false,
iterator, iterator,
reads.getDownsamplingFraction(), reads.getDownsamplingFraction(),
reads.getMaxOnTheFlySorts(), reads.getMaxOnTheFlySorts(),
reads.getSafetyChecking()); reads.getSafetyChecking());
} else { }
else {
throw new StingException("seek: Unknown shard type"); throw new StingException("seek: Unknown shard type");
} }

View File

@ -38,9 +38,7 @@ import java.util.ArrayList;
* To change this template use File | Settings | File Templates. * To change this template use File | Settings | File Templates.
*/ */
/** /** Shards and schedules data in manageable chunks. */
* Shards and schedules data in manageable chunks.
*/
public abstract class MicroScheduler { public abstract class MicroScheduler {
private static long SHARD_SIZE = 100000L; private static long SHARD_SIZE = 100000L;
@ -63,8 +61,7 @@ public abstract class MicroScheduler {
if (walker instanceof TreeReducible && nThreadsToUse > 1) { if (walker instanceof TreeReducible && nThreadsToUse > 1) {
logger.info("Creating hierarchical microscheduler"); logger.info("Creating hierarchical microscheduler");
return new HierarchicalMicroScheduler(walker, reads, ref, rods, nThreadsToUse); return new HierarchicalMicroScheduler(walker, reads, ref, rods, nThreadsToUse);
} } else {
else {
logger.info("Creating linear microscheduler"); logger.info("Creating linear microscheduler");
return new LinearMicroScheduler(walker, reads, ref, rods); return new LinearMicroScheduler(walker, reads, ref, rods);
} }
@ -113,10 +110,10 @@ public abstract class MicroScheduler {
*/ */
protected ShardStrategy getShardStrategy(Walker walker, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals) { protected ShardStrategy getShardStrategy(Walker walker, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals) {
ShardStrategy shardStrategy = null; ShardStrategy shardStrategy = null;
ShardStrategyFactory.SHATTER_STRATEGY shardType;
if (walker instanceof LocusWalker) { if (walker instanceof LocusWalker) {
if (intervals != null) { if (intervals != null) {
ShardStrategyFactory.SHATTER_STRATEGY shardType = (walker.isReduceByInterval()) ? shardType = (walker.isReduceByInterval()) ?
ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL : ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL :
ShardStrategyFactory.SHATTER_STRATEGY.LINEAR; ShardStrategyFactory.SHATTER_STRATEGY.LINEAR;
@ -124,19 +121,26 @@ public abstract class MicroScheduler {
drivingDataSource.getSequenceDictionary(), drivingDataSource.getSequenceDictionary(),
SHARD_SIZE, SHARD_SIZE,
intervals); intervals);
} } else
else
shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,
drivingDataSource.getSequenceDictionary(), drivingDataSource.getSequenceDictionary(),
SHARD_SIZE); SHARD_SIZE);
} } else if (walker instanceof ReadWalker) {
else if( walker instanceof ReadWalker ) {
shardStrategy = ShardStrategyFactory.shatter( ShardStrategyFactory.SHATTER_STRATEGY.READS, shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS;
if (intervals != null) {
shardStrategy = ShardStrategyFactory.shatter(shardType,
drivingDataSource.getSequenceDictionary(),
SHARD_SIZE,
intervals);
} else {
shardStrategy = ShardStrategyFactory.shatter(shardType,
drivingDataSource.getSequenceDictionary(), drivingDataSource.getSequenceDictionary(),
SHARD_SIZE); SHARD_SIZE);
} }
else } else
throw new StingException("Unable to support walker of type" + walker.getClass().getName()); throw new StingException("Unable to support walker of type" + walker.getClass().getName());
return shardStrategy; return shardStrategy;

View File

@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.dataSources.providers.ReadView;
import org.broadinstitute.sting.gatk.dataSources.providers.ReadReferenceView; import org.broadinstitute.sting.gatk.dataSources.providers.ReadReferenceView;
import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard; import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard;
import org.broadinstitute.sting.gatk.dataSources.shards.Shard; import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
import org.broadinstitute.sting.gatk.dataSources.shards.IntervalShard;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.ReadWalker;
@ -79,7 +80,12 @@ public class TraverseReads extends TraversalEngine {
ShardDataProvider dataProvider, ShardDataProvider dataProvider,
T sum) { T sum) {
if (shard instanceof ReadShard) {
logger.debug(String.format("TraverseReads.traverse Genomic interval is %s", ((ReadShard) shard).getSize())); logger.debug(String.format("TraverseReads.traverse Genomic interval is %s", ((ReadShard) shard).getSize()));
} else if (shard instanceof IntervalShard) {
logger.debug(String.format("TraverseReads.traverse Genomic interval is %s", ((IntervalShard) shard).getGenomeLoc()));
}
if (!(walker instanceof ReadWalker)) if (!(walker instanceof ReadWalker))
throw new IllegalArgumentException("Walker isn't a read walker!"); throw new IllegalArgumentException("Walker isn't a read walker!");

View File

@ -44,7 +44,7 @@ import net.sf.samtools.SAMFileHeader;
* <p/> * <p/>
* Tests the ReadIntervalShardStrategy class * Tests the ReadIntervalShardStrategy class
*/ */
public class ReadIntervalShardStrategyTest extends BaseTest { public class IntervalShardStrategyTest extends BaseTest {
private GenomeLocSortedSet mSortedSet = null; private GenomeLocSortedSet mSortedSet = null;
private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE); private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
@ -60,19 +60,21 @@ public class ReadIntervalShardStrategyTest extends BaseTest {
@Test(expected = StingException.class) @Test(expected = StingException.class)
public void testExceptionOnEmpty() { public void testExceptionOnEmpty() {
ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 100, mSortedSet); IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet);
} }
@Test @Test
public void testSingleChromosomeFunctionality() { public void testSingleChromosomeFunctionality() {
GenomeLoc loc = new GenomeLoc(1, 1, 1000); GenomeLoc loc = new GenomeLoc(1, 1, 1000);
mSortedSet.add(loc); mSortedSet.add(loc);
ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 100, mSortedSet); IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet);
int counter = 0; int counter = 0;
Shard d = null;
while (strat.hasNext()) { while (strat.hasNext()) {
Shard d = strat.next(); d = strat.next();
counter++; counter++;
} }
assertTrue(d instanceof IntervalShard);
assertEquals(10, counter); assertEquals(10, counter);
} }
@ -82,12 +84,14 @@ public class ReadIntervalShardStrategyTest extends BaseTest {
GenomeLoc loc = new GenomeLoc(x, 1, 1000); GenomeLoc loc = new GenomeLoc(x, 1, 1000);
mSortedSet.add(loc); mSortedSet.add(loc);
} }
ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 100, mSortedSet); IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet);
int counter = 0; int counter = 0;
Shard d = null;
while (strat.hasNext()) { while (strat.hasNext()) {
Shard d = strat.next(); d = strat.next();
counter++; counter++;
} }
assertTrue(d instanceof IntervalShard);
assertEquals(50, counter); assertEquals(50, counter);
} }
@ -97,7 +101,7 @@ public class ReadIntervalShardStrategyTest extends BaseTest {
GenomeLoc loc = new GenomeLoc(x, 1, 1000); GenomeLoc loc = new GenomeLoc(x, 1, 1000);
mSortedSet.add(loc); mSortedSet.add(loc);
} }
ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 789, mSortedSet); IntervalShardStrategy strat = new IntervalShardStrategy(789, mSortedSet);
int counter = 0; int counter = 0;
while (strat.hasNext()) { while (strat.hasNext()) {
Shard d = strat.next(); Shard d = strat.next();
@ -113,11 +117,28 @@ public class ReadIntervalShardStrategyTest extends BaseTest {
assertEquals(10, counter); assertEquals(10, counter);
} }
@Test
public void testInfiniteShardSize() {
for (int x = 0; x < 5; x++) {
GenomeLoc loc = new GenomeLoc(x, 1, 1000);
mSortedSet.add(loc);
}
IntervalShardStrategy strat = new IntervalShardStrategy(Long.MAX_VALUE, mSortedSet);
int counter = 0;
while (strat.hasNext()) {
Shard d = strat.next();
assertEquals(1000, d.getGenomeLoc().getStop());
counter++;
}
assertEquals(5, counter);
}
@Test(expected = UnsupportedOperationException.class) @Test(expected = UnsupportedOperationException.class)
public void testRemove() { public void testRemove() {
GenomeLoc loc = new GenomeLoc(1, 1, 1000); GenomeLoc loc = new GenomeLoc(1, 1, 1000);
mSortedSet.add(loc); mSortedSet.add(loc);
ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 100, mSortedSet); IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet);
strat.remove(); strat.remove();
} }

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.dataSources.shards;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.sam.ArtificialSamUtils; import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
@ -42,9 +41,9 @@ import net.sf.samtools.SAMFileHeader;
* <p/> * <p/>
* Tests for the IntervalReadShard class. * Tests for the IntervalReadShard class.
*/ */
public class IntervalReadShardTest extends BaseTest { public class IntervalShardTest extends BaseTest {
private IntervalReadShard shard = null; private IntervalShard intervalShard = null;
private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE); private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
private static final int NUMBER_OF_CHROMOSOMES = 5; private static final int NUMBER_OF_CHROMOSOMES = 5;
private static final int STARTING_CHROMOSOME = 1; private static final int STARTING_CHROMOSOME = 1;
@ -59,15 +58,15 @@ public class IntervalReadShardTest extends BaseTest {
@Test @Test
public void simpleReturn() { public void simpleReturn() {
GenomeLoc loc = new GenomeLoc(1, 1, 100); GenomeLoc loc = new GenomeLoc(1, 1, 100);
shard = new IntervalReadShard(loc); intervalShard = new IntervalShard(loc);
assertTrue(shard.getGenomeLoc().equals(loc)); assertTrue(intervalShard.getGenomeLoc().equals(loc));
} }
@Test @Test
public void ensureNotReference() { public void ensureNotReference() {
GenomeLoc loc = new GenomeLoc(1, 1, 100); GenomeLoc loc = new GenomeLoc(1, 1, 100);
shard = new IntervalReadShard(loc); intervalShard = new IntervalShard(loc);
assertTrue(shard.getGenomeLoc() != loc && shard.getGenomeLoc().equals(loc)); assertTrue(intervalShard.getGenomeLoc() != loc && intervalShard.getGenomeLoc().equals(loc));
} }
} }

View File

@ -8,6 +8,7 @@ import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMSequenceDictionary;
/* /*
@ -38,42 +39,65 @@ import net.sf.samtools.SAMFileHeader;
/** /**
* @author aaron * @author aaron
* <p/> * <p/>
* Class LocusIntervalShardStrategyTest * Class LocusShardStrategyTest
* <p/> * <p/>
* Tests the LocusIntervalShardStrategy class. * Test for the Locus Shard Strategy
*/ */
public class LocusIntervalShardStrategyTest extends BaseTest { public class LinearLocusShardStrategyTest extends BaseTest {
private GenomeLocSortedSet mSortedSet = null; private GenomeLocSortedSet mSortedSet = null;
private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE); private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
private static final int NUMBER_OF_CHROMOSOMES = 5; private static final int NUMBER_OF_CHROMOSOMES = 5;
private static final int STARTING_CHROMOSOME = 1; private static final int STARTING_CHROMOSOME = 1;
private static final int CHROMOSOME_SIZE = 1000; private static final int CHROMOSOME_SIZE = 1000;
private LocusIntervalShardStrategy strat = null;
@Before @Before
public void setup() { public void setup() {
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary()); GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
mSortedSet = new GenomeLocSortedSet();
} }
@Test @Test
public void testOneToOneness() { public void testSetup() {
for (int x = 0; x < 100; x++) { LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500);
GenomeLoc loc = new GenomeLoc(0,(x*10)+1, (x*10)+8);
mSortedSet.add(loc);
}
strat = new LocusIntervalShardStrategy(header.getSequenceDictionary(),mSortedSet);
int counter = 0; int counter = 0;
while(strat.hasNext()) { while(strat.hasNext()) {
Shard d = strat.next();
assertTrue(d instanceof LocusShard);
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 499);
++counter; ++counter;
GenomeLoc loc = strat.next().getGenomeLoc();
long stop = loc.getStop();
long start = loc.getStart();
long length = stop - start;
assertTrue(length == 7);
} }
assertTrue(counter == 100); assertTrue(counter == 10);
} }
@Test
public void testAdjustSize() {
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500);
strat.adjustNextShardSize(1000);
int counter = 0;
while(strat.hasNext()) {
Shard d = strat.next();
assertTrue(d instanceof LocusShard);
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 999);
++counter;
}
assertTrue(counter == 5);
}
@Test
public void testUnevenSplit() {
LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 600);
int counter = 0;
while(strat.hasNext()) {
Shard d = strat.next();
assertTrue(d instanceof LocusShard);
if (counter % 2 == 0) {
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 599);
} else {
assertTrue(d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart() == 399);
}
++counter;
}
assertTrue(counter == 10);
}
} }

View File

@ -2,20 +2,14 @@ package org.broadinstitute.sting.gatk.dataSources.shards;
import static junit.framework.Assert.assertEquals; import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.fail; import static junit.framework.Assert.fail;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.sam.ArtificialSamUtils; import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
import org.junit.*; import org.junit.*;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import java.io.File;
import java.util.ArrayList;
/** /**
* *
* User: aaron * User: aaron
@ -63,7 +57,7 @@ public class ShardStrategyFactoryTest extends BaseTest {
GenomeLoc l = new GenomeLoc(0,1,100); GenomeLoc l = new GenomeLoc(0,1,100);
set.add(l); set.add(l);
ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,header.getSequenceDictionary(),100,set); ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,header.getSequenceDictionary(),100,set);
assertTrue(st instanceof ReadIntervalShardStrategy); assertTrue(st instanceof IntervalShardStrategy);
} }
@Test @Test