From b5fa8482554cb53a4304c032111915320e1ac18f Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 20 Sep 2012 18:44:32 -0400 Subject: [PATCH] Fix GSA-515 Nanoscheduler GSA-573 -nt and -nct interact badly w.r.t. output -- See https://jira.broadinstitute.org/browse/GSA-573 -- Uses InheritedThreadLocal storage so that children threads created by the NanoScheduler see the parent stubs in the main thread. -- Added explicit integration test that checks that -nt 1, 2 and -nct 1, 2 give the same results for GLM BOTH with the UG over 1 MB. --- .../sting/gatk/executive/ShardTraverser.java | 4 ++ .../gatk/io/ThreadLocalOutputTracker.java | 39 ++++++++++---- .../NanoSchedulerIntegrationTest.java | 52 +++++++++++++++++++ 3 files changed, 86 insertions(+), 9 deletions(-) create mode 100755 public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java index d632892d5..e6f539614 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java @@ -55,6 +55,10 @@ public class ShardTraverser implements Callable { try { final long startTime = System.currentTimeMillis(); + // this is CRITICAL -- initializes the thread-local output maps in the parent thread, + // so that any subthreads created by the traversal itself are shared... + outputTracker.getStorageAndInitializeIfNecessary(); + Object accumulator = walker.reduceInit(); final WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(), microScheduler.getReadIterator(shard), diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java b/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java index 636787c69..e1e42a9a1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java @@ -39,14 +39,17 @@ import java.util.Map; /** * An output tracker that can either track its output per-thread or directly, * - * @author mhanna - * @version 0.1 + * @author mhanna, depristo + * @version 0.2 */ public class ThreadLocalOutputTracker extends OutputTracker { /** * Thread-local storage for output streams. + * + * MUST BE A INHERITABLE THREAD LOCAL + * -- NanoScheduler creates subthreads, and these threads must inherit the binding from their parent */ - private ThreadLocal> storage = new ThreadLocal>(); + private ThreadLocal> storage = new InheritableThreadLocal>(); /** * A total hack. If bypass = true, bypass thread local storage and write directly @@ -57,6 +60,29 @@ public class ThreadLocalOutputTracker extends OutputTracker { this.bypass = bypass; } + /** + * Initialize the storage map for this thread, if necessary. + * + * Checks if there's a thread local binding for this thread, and if + * not initializes it. + * + * Particularly useful in the case where we want to initialize the map in + * a parent thread but have it used available to all the children via + * the InheritedThreadLocal map. + * + * @return the storage + */ + public Map getStorageAndInitializeIfNecessary() { + Map threadLocalOutputStreams = storage.get(); + + if( threadLocalOutputStreams == null ) { + threadLocalOutputStreams = new HashMap(); + storage.set( threadLocalOutputStreams ); + } + + return threadLocalOutputStreams; + } + public T getStorage( Stub stub ) { Storage target; @@ -68,12 +94,7 @@ public class ThreadLocalOutputTracker extends OutputTracker { } } else { - Map threadLocalOutputStreams = storage.get(); - - if( threadLocalOutputStreams == null ) { - threadLocalOutputStreams = new HashMap(); - storage.set( threadLocalOutputStreams ); - } + final Map threadLocalOutputStreams = getStorageAndInitializeIfNecessary(); target = threadLocalOutputStreams.get(stub); if( target == null ) { diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java new file mode 100755 index 000000000..9318b6dce --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java @@ -0,0 +1,52 @@ +package org.broadinstitute.sting.utils.nanoScheduler; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +// ********************************************************************************** // +// Note that this class also serves as an integration test for the VariantAnnotator! // +// ********************************************************************************** // + +public class NanoSchedulerIntegrationTest extends WalkerTest { + @DataProvider(name = "NanoSchedulerUGTest") + public Object[][] createNanoSchedulerUGTest() { + List tests = new ArrayList(); + + for ( final int nt : Arrays.asList(1, 2) ) + for ( final int nct : Arrays.asList(1, 2) ) { +// tests.add(new Object[]{ "SNP", "a1c7546f32a8919a3f3a70a04b2e8322", nt, nct }); +// tests.add(new Object[]{ "INDEL", "0a6d2be79f4f8a4b0eb788cc4751b31b", nt, nct }); + tests.add(new Object[]{ "BOTH", "1eaf8ac30cdefd573850e58c1ec38790", nt, nct }); + } + + return tests.toArray(new Object[][]{}); + } + + @Test(enabled = true, dataProvider = "NanoSchedulerUGTest") + private void testNanoSchedulerUGTest(final String glm, final String md5, final int nt, final int nct ) { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T UnifiedGenotyper -R " + b37KGReference, + "-nosl --no_cmdline_in_header -G", + //"--dbsnp " + b37dbSNP132, + "-I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", + "-L 20:10,000,000-11,000,000", + "-glm " + glm, + "-nt " + nt, + "-nct " + nct, + "-o %s" + ), + 1, + Arrays.asList(md5) + ); + executeTest(String.format("testUG-glm:%s-nt%d-nct%d", glm, nt, nct), spec); + } + + + +}