Fix more tests that fail when run in parallel on the farm

-Allow the default S3 put timeout of 30 seconds for GATKRunReports
 to be overridden via a constructor argument, and use a timeout
 of 300 seconds for tests. The timeout remains 30 seconds in all
 other cases.

-Change integration tests that themselves dispatch farm jobs
 into pipeline tests. Necessary because some farm nodes are
 not set up as submit hosts. Pipeline tests are still run
 directly on gsa4.

-Bump up the timeout for the MaxRuntimeIntegrationTest even more
 (was still occasionally failing on the farm!)
This commit is contained in:
David Roazen 2013-03-12 13:41:29 -04:00
parent dcdd6e3e60
commit cdb1fa1105
5 changed files with 39 additions and 15 deletions

View File

@ -78,17 +78,11 @@ public class GATKRunReport {
private static final DateFormat DATE_FORMAT = new SimpleDateFormat("yyyy/MM/dd HH.mm.ss");
/**
* number of milliseconds before the S3 put operation is timed-out:
*/
private static final long S3_PUT_TIME_OUT = 30 * 1000;
/**
* The root file system directory where we keep common report data
*/
private final static File REPORT_DIR = new File("/humgen/gsa-hpprojects/GATK/reports");
/**
* The full path to the direct where submitted (and uncharacterized) report files are written
*/
@ -105,6 +99,17 @@ public class GATKRunReport {
*/
protected static final Logger logger = Logger.getLogger(GATKRunReport.class);
/**
* Default value for the number of milliseconds before an S3 put operation is timed-out.
* Can be overridden via a constructor argument.
*/
private static final long S3_DEFAULT_PUT_TIME_OUT_IN_MILLISECONDS = 30 * 1000;
/**
* Number of milliseconds before an S3 put operation is timed-out.
*/
private long s3PutTimeOutInMilliseconds = S3_DEFAULT_PUT_TIME_OUT_IN_MILLISECONDS;
// -----------------------------------------------------------------
// elements captured for the report
// -----------------------------------------------------------------
@ -230,13 +235,31 @@ public class GATKRunReport {
}
/**
* Create a new RunReport and population all of the fields with values from the walker and engine
* Create a new RunReport and population all of the fields with values from the walker and engine.
* Allows the S3 put timeout to be explicitly set.
*
* @param walker the GATK walker that we ran
* @param e the exception caused by running this walker, or null if we completed successfully
* @param engine the GAE we used to run the walker, so we can fetch runtime, args, etc
* @param type the GATK phone home setting
* @param s3PutTimeOutInMilliseconds number of milliseconds to wait before timing out an S3 put operation
*/
public GATKRunReport(Walker<?,?> walker, Exception e, GenomeAnalysisEngine engine, PhoneHomeOption type) {
public GATKRunReport(final Walker<?,?> walker, final Exception e, final GenomeAnalysisEngine engine, final PhoneHomeOption type,
final long s3PutTimeOutInMilliseconds) {
this(walker, e, engine, type);
this.s3PutTimeOutInMilliseconds = s3PutTimeOutInMilliseconds;
}
/**
* Create a new RunReport and population all of the fields with values from the walker and engine.
* Leaves the S3 put timeout set to the default value of S3_DEFAULT_PUT_TIME_OUT_IN_MILLISECONDS.
*
* @param walker the GATK walker that we ran
* @param e the exception caused by running this walker, or null if we completed successfully
* @param engine the GAE we used to run the walker, so we can fetch runtime, args, etc
* @param type the GATK phone home setting
*/
public GATKRunReport(final Walker<?,?> walker, final Exception e, final GenomeAnalysisEngine engine, final PhoneHomeOption type) {
if ( type == PhoneHomeOption.NO_ET )
throw new ReviewedStingException("Trying to create a run report when type is NO_ET!");
@ -563,7 +586,7 @@ public class GATKRunReport {
throw new IllegalStateException("We are throwing an exception for testing purposes");
case TIMEOUT:
try {
Thread.sleep(S3_PUT_TIME_OUT * 100);
Thread.sleep(s3PutTimeOutInMilliseconds * 100);
} catch ( InterruptedException e ) {
// supposed to be empty
}
@ -625,7 +648,7 @@ public class GATKRunReport {
s3thread.setName("S3Put-Thread");
s3thread.start();
s3thread.join(S3_PUT_TIME_OUT);
s3thread.join(s3PutTimeOutInMilliseconds);
if(s3thread.isAlive()){
s3thread.interrupt();

View File

@ -39,7 +39,8 @@ import java.util.concurrent.TimeUnit;
*
*/
public class MaxRuntimeIntegrationTest extends WalkerTest {
private static final long STARTUP_TIME = TimeUnit.NANOSECONDS.convert(120, TimeUnit.SECONDS);
// Assume a ridiculous amount of startup overhead to allow for running these tests on slow farm nodes
private static final long STARTUP_TIME = TimeUnit.NANOSECONDS.convert(300, TimeUnit.SECONDS);
private class MaxRuntimeTestProvider extends TestDataProvider {
final long maxRuntime;
@ -68,7 +69,7 @@ public class MaxRuntimeIntegrationTest extends WalkerTest {
//
// Loop over errors to throw, make sure they are the errors we get back from the engine, regardless of NT type
//
@Test(enabled = true, dataProvider = "MaxRuntimeProvider", timeOut = 300 * 1000)
@Test(enabled = true, dataProvider = "MaxRuntimeProvider", timeOut = 600 * 1000)
public void testMaxRuntime(final MaxRuntimeTestProvider cfg) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T PrintReads -R " + hg18Reference

View File

@ -34,7 +34,7 @@ import org.testng.annotations.Test;
import java.io.File;
import java.util.*;
public class JnaSessionIntegrationTest extends BaseTest {
public class JnaSessionPipelineTest extends BaseTest {
private String implementation = null;
private static final SessionFactory factory = new JnaSessionFactory();

View File

@ -40,7 +40,7 @@ import java.io.File;
import java.util.Arrays;
import java.util.List;
public class LibDrmaaIntegrationTest extends BaseTest {
public class LibDrmaaPipelineTest extends BaseTest {
private String implementation = null;
@Test

View File

@ -40,7 +40,7 @@ import java.io.File;
/**
* Really unit tests, but these test will only run on systems with LSF setup.
*/
public class LibBatIntegrationTest extends BaseTest {
public class LibBatPipelineTest extends BaseTest {
@BeforeClass
public void initLibBat() {
Assert.assertFalse(LibBat.lsb_init("LibBatIntegrationTest") < 0, LibBat.lsb_sperror("lsb_init() failed"));