From cc5d695bcf3574f0cadf4d5e5a1b395532c20bae Mon Sep 17 00:00:00 2001 From: kshakir Date: Sat, 5 Feb 2011 00:06:12 +0000 Subject: [PATCH] Renamed the IPFL Test to IPFL PipelineTest so that it'll be picked up by the PipelineTests. HACK: Turned off JNA autoRead() in the jobInfoEnt LSF structure to try and dodge the SIGSEGV during strlen calls during bmods. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5201 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/jna/lsf/v7_0_6/LibBat.java | 53 ++++++++++++++++++- ...est.scala => IPFLibraryPipelineTest.scala} | 5 +- 2 files changed, 54 insertions(+), 4 deletions(-) rename scala/test/org/broadinstitute/sting/queue/pipeline/{IPFLibraryTest.scala => IPFLibraryPipelineTest.scala} (95%) diff --git a/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBat.java b/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBat.java index 1beb8fb86..c301fffd5 100644 --- a/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBat.java +++ b/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBat.java @@ -5050,6 +5050,55 @@ public class LibBat { } + /** + * HACK: A version of the submit structure without autoread, so that + * jobInfoEnt doesn't try to populate the structure on return from lsb_readjobinfo. + * There are several reports of kernel crashes in strlen after a call to lsb_readjobinfo during the autoRead(). + * + * Example: + + Current thread (0x0000000050efd800): JavaThread "main" [_thread_in_native, id=22268, stack(0x0000000040dbf000,0x0000000040ec0000)] + + siginfo:si_signo=SIGSEGV: si_errno=0, si_code=128 (), si_addr=0x0000000000000000 + + Stack: [0x0000000040dbf000,0x0000000040ec0000], sp=0x0000000040ebc018, free space=3f40000000000000018k + Native frames: (J=compiled Java code, j=interpreted, Vv=VM code, C=native code) + C [libc.so.6+0x797c0] strlen+0x10 + j com.sun.jna.Pointer._getString(JZ)Ljava/lang/String;+0 + j com.sun.jna.Pointer.getString(JZ)Ljava/lang/String;+7 + j com.sun.jna.Pointer.getString(J)Ljava/lang/String;+90 + j com.sun.jna.Pointer.getValue(JLjava/lang/Class;Ljava/lang/Object;)Ljava/lang/Object;+630 + j com.sun.jna.Structure.readField(Lcom/sun/jna/Structure$StructField;)Ljava/lang/Object;+168 + j com.sun.jna.Structure.read()V+82 + j com.sun.jna.Structure.autoRead()V+8 + j com.sun.jna.Structure.updateStructureByReference(Ljava/lang/Class;Lcom/sun/jna/Structure;Lcom/sun/jna/Pointer;)Lcom/sun/jna/Structure;+68 + j com.sun.jna.Pointer.getValue(JLjava/lang/Class;Ljava/lang/Object;)Ljava/lang/Object;+74 + j com.sun.jna.Structure.readField(Lcom/sun/jna/Structure$StructField;)Ljava/lang/Object;+168 + j com.sun.jna.Structure.read()V+82 + j com.sun.jna.Structure.autoRead()V+8 + v ~StubRoutines::call_stub + V [libjvm.so+0x3e756d] + V [libjvm.so+0x5f6f59] + V [libjvm.so+0x3e73a5] + V [libjvm.so+0x420904] + V [libjvm.so+0x400ea5] + C [jna1670124220621463742.tmp+0x6feb] newJavaStructure+0xdb + C [jna1670124220621463742.tmp+0xb919] + C [jna1670124220621463742.tmp+0x11008] ffi_closure_unix64_inner+0x88 + C [jna1670124220621463742.tmp+0x11438] ffi_closure_unix64+0x46 + j org.broadinstitute.sting.queue.engine.Lsf706JobRunner.status()Lscala/Enumeration$Value;+36 + j org.broadinstitute.sting.queue.engine.FunctionEdge.status()Lscala/Enumeration$Value;+72 + j org.broadinstitute.sting.queue.engine.QGraph$$anonfun$getReadyJobs$1.apply(Lorg/broadinstitute/sting/queue/engine/QEdge;)Z+44 + j org.broadinstitute.sting.queue.engine.QGraph$$anonfun$getReadyJobs$1.apply(Ljava/lang/Object;)Ljava/lang/Object;+5 + + * Because the error is in the second level call to autoRead(), and also in a structure that has a String, we are assuming + * that the error is on the submit structure even though this problem is very hard to reproduce consistently at the moment. + */ + public static class submitWithoutAutoRead extends submit { + public submitWithoutAutoRead() { + this.setAutoRead(false); + } + } /** @@ -6096,6 +6145,7 @@ public class LibBat { /** * \brief job information entry. + * HACK: The submit value in this structure currently has autoRead() set to false as a possible workaround for a SIGSEGV error. */ public static class jobInfoEnt extends Structure { public static class ByReference extends jobInfoEnt implements Structure.ByReference { @@ -6242,8 +6292,9 @@ public class LibBat { /** * < Structure for \ref lsb_submit call. + * HACK: Use a structure that has the same size, but has autoRead turned off. Hopes to work around kernel SIGSEGV. */ - public submit submit; + public submitWithoutAutoRead submit; /** * < Job exit status. diff --git a/scala/test/org/broadinstitute/sting/queue/pipeline/IPFLibraryTest.scala b/scala/test/org/broadinstitute/sting/queue/pipeline/IPFLibraryPipelineTest.scala similarity index 95% rename from scala/test/org/broadinstitute/sting/queue/pipeline/IPFLibraryTest.scala rename to scala/test/org/broadinstitute/sting/queue/pipeline/IPFLibraryPipelineTest.scala index 2bbc0b1b2..6b32e5c9d 100755 --- a/scala/test/org/broadinstitute/sting/queue/pipeline/IPFLibraryTest.scala +++ b/scala/test/org/broadinstitute/sting/queue/pipeline/IPFLibraryPipelineTest.scala @@ -1,10 +1,9 @@ package org.broadinstitute.sting.queue.pipeline import org.testng.annotations.Test -import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} import org.broadinstitute.sting.BaseTest -class IPFLibraryTest { +class IPFLibraryPipelineTest { @Test def testVCFExtractSites { @@ -69,4 +68,4 @@ class IPFLibraryTest { spec.fileMD5s += testOut -> "ee09af803bc94987d55d044c2ebbc0b8" } -} \ No newline at end of file +}