2011-01-14 11:14:24 +08:00
// our package
package org.broadinstitute.sting.utils.threading ;
// the imports for unit testing.
import net.sf.picard.reference.IndexedFastaSequenceFile ;
import org.broadinstitute.sting.BaseTest ;
2011-01-20 20:58:13 +08:00
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator ;
2011-01-14 11:14:24 +08:00
import org.broadinstitute.sting.utils.GenomeLoc ;
import org.broadinstitute.sting.utils.GenomeLocParser ;
import org.broadinstitute.sting.utils.exceptions.UserException ;
import org.testng.Assert ;
2011-01-18 05:23:09 +08:00
import org.testng.annotations.* ;
2011-01-14 11:14:24 +08:00
import java.io.File ;
import java.io.FileNotFoundException ;
import java.util.ArrayList ;
import java.util.Arrays ;
2011-01-20 20:58:13 +08:00
import java.util.Iterator ;
2011-01-14 11:14:24 +08:00
import java.util.List ;
2011-01-18 05:23:09 +08:00
import java.util.concurrent.* ;
2011-01-14 11:14:24 +08:00
/ * *
* Basic unit test for GenomeLoc
* /
public class GenomeLocProcessingTrackerUnitTest extends BaseTest {
IndexedFastaSequenceFile fasta = null ;
GenomeLocParser genomeLocParser = null ;
String chr1 = null ;
2011-01-20 20:58:13 +08:00
private final static String FILE_ROOT = "testdata/GLPTFile" ;
2011-01-14 11:14:24 +08:00
@BeforeTest
public void before ( ) {
File referenceFile = new File ( hg18Reference ) ;
try {
fasta = new IndexedFastaSequenceFile ( referenceFile ) ;
chr1 = fasta . getSequenceDictionary ( ) . getSequence ( 1 ) . getSequenceName ( ) ;
genomeLocParser = new GenomeLocParser ( fasta ) ;
}
catch ( FileNotFoundException ex ) {
throw new UserException . CouldNotReadInputFile ( referenceFile , ex ) ;
}
}
2011-01-20 20:58:13 +08:00
@BeforeMethod
public void beforeMethod ( Object [ ] data ) {
if ( data . length > 0 )
( ( TestTarget ) data [ 0 ] ) . init ( ) ;
}
2011-01-14 11:14:24 +08:00
2011-01-20 20:58:13 +08:00
@AfterMethod
public void afterMethod ( Object [ ] data ) {
2011-01-26 21:45:40 +08:00
if ( data . length > 0 ) {
2011-01-20 20:58:13 +08:00
( ( TestTarget ) data [ 0 ] ) . getTracker ( ) . close ( ) ;
2011-01-26 21:45:40 +08:00
( ( TestTarget ) data [ 0 ] ) . cleanup ( ) ;
}
2011-01-18 05:23:09 +08:00
}
2011-01-14 11:14:24 +08:00
abstract private class TestTarget {
String name ;
int nShards ;
int shardSize ;
2011-01-26 21:45:40 +08:00
File file ;
2011-01-14 11:14:24 +08:00
2011-01-26 21:45:40 +08:00
public void init ( ) { cleanup ( ) ; }
public void cleanup ( ) {
if ( file ! = null & & file . exists ( ) )
file . delete ( ) ;
}
2011-01-20 20:58:13 +08:00
2011-01-26 21:45:40 +08:00
public boolean isThreadSafe ( ) { return true ; }
protected TestTarget ( String name , int nShards , int shardSize , File file ) {
2011-01-14 11:14:24 +08:00
this . name = name ;
this . nShards = nShards ;
this . shardSize = shardSize ;
2011-01-26 21:45:40 +08:00
this . file = file ;
2011-01-14 11:14:24 +08:00
}
public abstract GenomeLocProcessingTracker getTracker ( ) ;
public List < GenomeLoc > getShards ( ) {
List < GenomeLoc > shards = new ArrayList < GenomeLoc > ( ) ;
for ( int i = 0 ; i < nShards ; i + + ) {
int start = shardSize * i ;
int stop = start + shardSize ;
shards . add ( genomeLocParser . createGenomeLoc ( chr1 , start , stop ) ) ;
}
return shards ;
}
public String toString ( ) {
return String . format ( "TestTarget %s: nShards=%d shardSize=%d" , name , nShards , shardSize ) ;
}
}
2011-01-20 20:58:13 +08:00
@DataProvider ( name = "threadData" )
public Object [ ] [ ] createThreadData ( ) {
2011-01-27 21:48:38 +08:00
// gotta keep the tests small...
return createData ( Arrays . asList ( 10 , 100 ) , Arrays . asList ( 10 ) ) ;
//return createData(Arrays.asList(10, 100, 1000, 10000), Arrays.asList(10));
2011-01-18 05:23:09 +08:00
}
2011-01-20 20:58:13 +08:00
public Object [ ] [ ] createData ( List < Integer > nShards , List < Integer > shardSizes ) {
2011-01-14 11:14:24 +08:00
List < TestTarget > params = new ArrayList < TestTarget > ( ) ;
2011-01-20 20:58:13 +08:00
int counter = 0 ;
2011-01-26 21:45:40 +08:00
String name = null ;
2011-01-20 20:58:13 +08:00
for ( int nShard : nShards ) {
for ( int shardSize : shardSizes ) {
2011-01-14 11:14:24 +08:00
// shared mem -- canonical implementation
2011-01-26 21:45:40 +08:00
params . add ( new TestTarget ( "ThreadSafeSharedMemory" , nShard , shardSize , null ) {
GenomeLocProcessingTracker tracker = new SharedMemoryGenomeLocProcessingTracker ( new ClosableReentrantLock ( ) ) ;
2011-01-14 11:14:24 +08:00
public GenomeLocProcessingTracker getTracker ( ) { return tracker ; }
} ) ;
2011-01-18 05:23:09 +08:00
2011-01-20 20:58:13 +08:00
final File file1 = new File ( String . format ( "%s_ThreadSafeFileBacked_%d_%d" , FILE_ROOT , counter + + , nShard , shardSize ) ) ;
2011-01-26 21:45:40 +08:00
params . add ( new TestTarget ( "ThreadSafeFileBacked" , nShard , shardSize , file1 ) {
GenomeLocProcessingTracker tracker = new FileBackedGenomeLocProcessingTracker ( file1 , genomeLocParser , new ClosableReentrantLock ( ) , null ) ;
2011-01-20 20:58:13 +08:00
public GenomeLocProcessingTracker getTracker ( ) { return tracker ; }
} ) ;
2011-01-26 21:45:40 +08:00
name = "FileBackedSharedFileThreadSafe" ;
final File file2 = new File ( String . format ( "%s_%s_%d_%d" , FILE_ROOT , name , counter + + , nShard , shardSize ) ) ;
params . add ( new TestTarget ( name , nShard , shardSize , file2 ) {
GenomeLocProcessingTracker tracker = new FileBackedGenomeLocProcessingTracker ( file2 , genomeLocParser , new SharedFileThreadSafeLock ( file2 , - 1 ) , null ) ;
public GenomeLocProcessingTracker getTracker ( ) { return tracker ; }
} ) ;
name = "FileBackedSharedFile" ;
final File file3 = new File ( String . format ( "%s_%s_%d_%d" , FILE_ROOT , name , counter + + , nShard , shardSize ) ) ;
params . add ( new TestTarget ( name , nShard , shardSize , file3 ) {
GenomeLocProcessingTracker tracker = new FileBackedGenomeLocProcessingTracker ( file3 , genomeLocParser , new SharedFileLock ( file3 , - 1 ) , null ) ;
public GenomeLocProcessingTracker getTracker ( ) { return tracker ; }
public boolean isThreadSafe ( ) { return false ; }
} ) ;
2011-01-14 11:14:24 +08:00
}
}
List < Object [ ] > params2 = new ArrayList < Object [ ] > ( ) ;
for ( TestTarget x : params ) params2 . add ( new Object [ ] { x } ) ;
return params2 . toArray ( new Object [ ] [ ] { } ) ;
}
2011-01-20 20:58:13 +08:00
@DataProvider ( name = "simpleData" )
public Object [ ] [ ] createSimpleData ( ) {
return createData ( Arrays . asList ( 1000 ) , Arrays . asList ( 100 ) ) ;
}
2011-01-14 11:14:24 +08:00
private static final String NAME_ONE = "name1" ;
private static final String NAME_TWO = "name2" ;
2011-01-20 20:58:13 +08:00
@Test ( enabled = true )
public void testNoop ( ) {
2011-01-26 21:45:40 +08:00
GenomeLocProcessingTracker tracker = new NoOpGenomeLocProcessingTracker ( ) ;
2011-01-20 20:58:13 +08:00
for ( int start = 1 ; start < 100 ; start + + ) {
for ( int n = 0 ; n < 2 ; n + + ) {
GenomeLoc loc = genomeLocParser . createGenomeLoc ( chr1 , start , start + 1 ) ;
ProcessingLoc ploc = tracker . claimOwnership ( loc , NAME_ONE ) ;
Assert . assertTrue ( ploc . isOwnedBy ( NAME_ONE ) ) ;
2011-01-28 11:40:09 +08:00
Assert . assertEquals ( tracker . updateAndGetProcessingLocs ( NAME_ONE ) . size ( ) , 0 ) ;
2011-01-20 20:58:13 +08:00
}
}
}
@Test ( dataProvider = "simpleData" , enabled = true )
2011-01-14 11:14:24 +08:00
public void testSingleProcessTracker ( TestTarget test ) {
GenomeLocProcessingTracker tracker = test . getTracker ( ) ;
List < GenomeLoc > shards = test . getShards ( ) ;
logger . warn ( "testSingleProcessTracker " + test ) ;
int counter = 0 ;
for ( GenomeLoc shard : shards ) {
counter + + ;
2011-01-25 00:45:07 +08:00
Assert . assertNull ( tracker . findOwner ( shard , NAME_ONE ) ) ;
Assert . assertFalse ( tracker . locIsOwned ( shard , NAME_ONE ) ) ;
2011-01-14 11:14:24 +08:00
2011-01-20 20:58:13 +08:00
ProcessingLoc proc = tracker . claimOwnership ( shard , NAME_ONE ) ;
2011-01-14 11:14:24 +08:00
Assert . assertNotNull ( proc ) ;
2011-01-20 20:58:13 +08:00
Assert . assertNotNull ( proc . getLocation ( ) ) ;
2011-01-14 11:14:24 +08:00
Assert . assertNotNull ( proc . getOwner ( ) ) ;
2011-01-20 20:58:13 +08:00
Assert . assertEquals ( proc . getLocation ( ) , shard ) ;
2011-01-14 11:14:24 +08:00
Assert . assertEquals ( proc . getOwner ( ) , NAME_ONE ) ;
2011-01-25 00:45:07 +08:00
Assert . assertEquals ( tracker . findOwner ( shard , NAME_ONE ) , proc ) ;
Assert . assertTrue ( tracker . locIsOwned ( shard , NAME_ONE ) ) ;
2011-01-28 11:40:09 +08:00
Assert . assertNotNull ( tracker . updateAndGetProcessingLocs ( NAME_ONE ) ) ;
Assert . assertEquals ( tracker . updateAndGetProcessingLocs ( NAME_ONE ) . size ( ) , counter ) ;
2011-01-14 11:14:24 +08:00
2011-01-20 20:58:13 +08:00
ProcessingLoc badClaimAttempt = tracker . claimOwnership ( shard , NAME_TWO ) ;
2011-01-14 11:14:24 +08:00
Assert . assertFalse ( badClaimAttempt . getOwner ( ) . equals ( NAME_TWO ) ) ;
Assert . assertEquals ( badClaimAttempt . getOwner ( ) , NAME_ONE ) ;
}
}
2011-01-20 20:58:13 +08:00
@Test ( dataProvider = "simpleData" , enabled = true )
public void testIterator ( TestTarget test ) {
GenomeLocProcessingTracker tracker = test . getTracker ( ) ;
List < GenomeLoc > shards = test . getShards ( ) ;
logger . warn ( "testIterator " + test ) ;
List < GenomeLoc > markedShards = new ArrayList < GenomeLoc > ( ) ;
List < GenomeLoc > toFind = new ArrayList < GenomeLoc > ( ) ;
for ( int i = 0 ; i < shards . size ( ) ; i + + ) {
if ( ! ( i % 10 = = 0 ) ) {
markedShards . add ( shards . get ( i ) ) ;
tracker . claimOwnership ( shards . get ( i ) , NAME_TWO ) ;
} else {
toFind . add ( shards . get ( i ) ) ;
}
}
int nFound = 0 ;
Iterator < GenomeLoc > it = shards . iterator ( ) ;
while ( it . hasNext ( ) ) {
GenomeLoc shard = tracker . claimOwnershipOfNextAvailable ( it , NAME_ONE ) ;
if ( shard = = null ) { // everything to get is done
Assert . assertEquals ( nFound , toFind . size ( ) , "Didn't find all of the available shards" ) ;
} else {
nFound + + ;
2011-01-25 00:45:07 +08:00
ProcessingLoc proc = tracker . findOwner ( shard , NAME_ONE ) ;
2011-01-20 20:58:13 +08:00
Assert . assertTrue ( proc . isOwnedBy ( NAME_ONE ) ) ;
Assert . assertTrue ( ! markedShards . contains ( shard ) , "Ran process was already marked!" ) ;
Assert . assertTrue ( toFind . contains ( shard ) , "Claimed shard wasn't one of the unmarked!" ) ;
}
}
}
@Test ( dataProvider = "simpleData" , enabled = true )
2011-01-14 11:14:24 +08:00
public void testMarkedProcesses ( TestTarget test ) {
GenomeLocProcessingTracker tracker = test . getTracker ( ) ;
List < GenomeLoc > shards = test . getShards ( ) ;
logger . warn ( "testMarkedProcesses " + test ) ;
List < GenomeLoc > markedShards = new ArrayList < GenomeLoc > ( ) ;
for ( int i = 0 ; i < shards . size ( ) ; i + + ) {
if ( i % 2 = = 0 ) {
markedShards . add ( shards . get ( i ) ) ;
tracker . claimOwnership ( shards . get ( i ) , NAME_TWO ) ;
}
}
for ( GenomeLoc shard : shards ) {
2011-01-20 20:58:13 +08:00
ProcessingLoc proc = tracker . claimOwnership ( shard , NAME_ONE ) ;
2011-01-14 11:14:24 +08:00
Assert . assertTrue ( proc . isOwnedBy ( NAME_ONE ) | | proc . isOwnedBy ( NAME_TWO ) ) ;
if ( proc . isOwnedBy ( NAME_ONE ) )
2011-01-18 05:23:09 +08:00
Assert . assertTrue ( ! markedShards . contains ( shard ) , "Ran process was already marked!" ) ;
2011-01-14 11:14:24 +08:00
else
2011-01-18 05:23:09 +08:00
Assert . assertTrue ( markedShards . contains ( shard ) , "Unran process wasn't marked" ) ;
2011-01-14 11:14:24 +08:00
if ( ! markedShards . contains ( shard ) ) {
2011-01-25 00:45:07 +08:00
Assert . assertEquals ( tracker . findOwner ( shard , NAME_ONE ) , proc ) ;
2011-01-14 11:14:24 +08:00
}
}
}
public class TestThread implements Callable < Integer > {
public TestTarget test ;
public String name ;
2011-01-18 05:23:09 +08:00
public List < GenomeLoc > ran , toRun ;
2011-01-20 20:58:13 +08:00
boolean useIterator ;
2011-01-14 11:14:24 +08:00
2011-01-20 20:58:13 +08:00
public TestThread ( TestTarget test , int count , List < GenomeLoc > toRun , boolean useIterator ) {
2011-01-14 11:14:24 +08:00
this . test = test ;
2011-01-18 05:23:09 +08:00
this . toRun = toRun ;
2011-01-14 11:14:24 +08:00
this . name = "thread" + count ;
this . ran = new ArrayList < GenomeLoc > ( ) ;
2011-01-20 20:58:13 +08:00
this . useIterator = useIterator ;
2011-01-14 11:14:24 +08:00
}
public Integer call ( ) {
2011-01-20 20:58:13 +08:00
//logger.warn(String.format("Call() Thread %s", name));
if ( useIterator ) {
for ( GenomeLoc shard : test . getTracker ( ) . onlyOwned ( toRun . iterator ( ) , name ) ) {
if ( shard ! = null ) { // ignore the unclaimable end of the stream
ran . add ( shard ) ;
// do some work here
for ( int sum = 0 , i = 0 ; i < 100000 ; i + + ) sum + = i ;
}
}
} else {
for ( GenomeLoc shard : toRun ) {
//System.out.printf("Claiming ownership in %s on %s%n", name, shard);
ProcessingLoc proc = test . getTracker ( ) . claimOwnership ( shard , name ) ;
//System.out.printf(" => ownership of %s is %s (I own? %b)%n", shard, proc.getOwner(), proc.isOwnedBy(name));
if ( proc . isOwnedBy ( name ) ) {
ran . add ( proc . getLocation ( ) ) ;
// do some work here
for ( int sum = 0 , i = 0 ; i < 100000 ; i + + ) sum + = i ;
}
//logger.warn(String.format("Thread %s on %s -> owned by %s", name, shard, proc.getOwner()));
2011-01-14 11:14:24 +08:00
}
}
return 1 ;
}
}
private static TestThread findOwner ( String name , List < TestThread > threads ) {
for ( TestThread thread : threads ) {
if ( thread . name . equals ( name ) )
return thread ;
}
return null ;
}
2011-01-14 21:09:31 +08:00
private static final < T > void assertAllThreadsFinished ( List < Future < T > > futures ) {
2011-01-18 05:23:09 +08:00
try {
for ( Future f : futures ) {
Assert . assertTrue ( f . isDone ( ) , "Thread never finished running" ) ;
Assert . assertTrue ( f . get ( ) ! = null , "Finished successfully" ) ;
}
} catch ( InterruptedException e ) {
Assert . fail ( "Thread failed to run to completion" , e ) ;
} catch ( ExecutionException e ) {
Assert . fail ( "Thread generated an exception" , e ) ;
}
}
private static final List < GenomeLoc > subList ( List < GenomeLoc > l , int i ) {
List < GenomeLoc > r = new ArrayList < GenomeLoc > ( ) ;
for ( int j = 0 ; j < l . size ( ) ; j + + ) {
if ( j % i = = 0 )
r . add ( l . get ( j ) ) ;
}
return r ;
2011-01-14 21:09:31 +08:00
}
2011-01-20 20:58:13 +08:00
@Test ( dataProvider = "threadData" , enabled = true )
public void testThreadedProcessesLowLevelFunctions ( TestTarget test ) {
testThreading ( test , false ) ;
}
@Test ( dataProvider = "threadData" , enabled = true )
public void testThreadedProcessesIterator ( TestTarget test ) {
testThreading ( test , true ) ;
}
private void testThreading ( TestTarget test , boolean useIterator ) {
2011-01-26 21:45:40 +08:00
if ( ! test . isThreadSafe ( ) )
// skip tests that aren't thread safe
return ;
2011-01-14 11:14:24 +08:00
// start up 3 threads
2011-01-20 20:58:13 +08:00
logger . warn ( "ThreadedTesting " + test + " using iterator " + useIterator ) ;
2011-01-14 11:14:24 +08:00
List < TestThread > threads = new ArrayList < TestThread > ( ) ;
for ( int i = 0 ; i < 4 ; i + + ) {
2011-01-18 05:23:09 +08:00
List < GenomeLoc > toRun = subList ( test . getShards ( ) , i + 1 ) ;
2011-01-20 20:58:13 +08:00
TestThread thread = new TestThread ( test , i , toRun , useIterator ) ;
2011-01-14 11:14:24 +08:00
threads . add ( thread ) ;
}
ExecutorService exec = java . util . concurrent . Executors . newFixedThreadPool ( threads . size ( ) ) ;
try {
2011-01-14 21:09:31 +08:00
List < Future < Integer > > results = exec . invokeAll ( threads , 300 , TimeUnit . SECONDS ) ;
2011-01-14 11:14:24 +08:00
GenomeLocProcessingTracker tracker = test . getTracker ( ) ;
List < GenomeLoc > shards = test . getShards ( ) ;
for ( TestThread thread : threads )
2011-01-18 05:23:09 +08:00
logger . warn ( String . format ( "TestThread %s ran %d jobs of %d to run" , thread . name , thread . ran . size ( ) , thread . toRun . size ( ) ) ) ;
2011-01-14 11:14:24 +08:00
2011-01-14 21:09:31 +08:00
assertAllThreadsFinished ( results ) ;
2011-01-14 11:14:24 +08:00
// we ran everything
2011-01-28 11:40:09 +08:00
Assert . assertEquals ( tracker . updateAndGetProcessingLocs ( NAME_ONE ) . size ( ) , shards . size ( ) , "Not all shards were run" ) ;
2011-01-14 11:14:24 +08:00
for ( GenomeLoc shard : shards ) {
2011-01-25 00:45:07 +08:00
Assert . assertTrue ( tracker . locIsOwned ( shard , NAME_ONE ) , "Unowned shard" ) ;
2011-01-14 11:14:24 +08:00
2011-01-25 00:45:07 +08:00
ProcessingLoc proc = tracker . findOwner ( shard , NAME_ONE ) ;
2011-01-14 11:14:24 +08:00
Assert . assertNotNull ( proc , "Proc was null" ) ;
Assert . assertNotNull ( proc . getOwner ( ) , "Owner was null" ) ;
2011-01-20 20:58:13 +08:00
Assert . assertEquals ( proc . getLocation ( ) , shard , "Shard loc doesn't make ProcessingLoc" ) ;
2011-01-14 11:14:24 +08:00
TestThread owner = findOwner ( proc . getOwner ( ) , threads ) ;
Assert . assertNotNull ( owner , "Couldn't find owner" ) ;
Assert . assertTrue ( owner . ran . contains ( shard ) , "Owner doesn't contain ran shard" ) ;
for ( TestThread thread : threads )
2011-01-18 05:23:09 +08:00
if ( ! proc . isOwnedBy ( thread . name ) & & thread . ran . contains ( shard ) )
Assert . fail ( "Shard appears in another run list: proc=" + proc + " shard=" + shard + " also in jobs of " + thread . name + " obj=" + thread . ran . get ( thread . ran . indexOf ( shard ) ) ) ;
2011-01-14 11:14:24 +08:00
}
} catch ( InterruptedException e ) {
Assert . fail ( "Thread failure" , e ) ;
}
}
}