From a5721a8846034bd97be63c9149b124822e79b2b2 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 25 Jul 2012 13:38:07 -0400 Subject: [PATCH] Context covariate optimizations were not suited for multiple threads, so I removed them (since that ended up being much, much easier than trying to make the covariates thread local). Added -nt 2 layer to BQSR integration tests to confirm that it now works with multiple threads. --- .../walkers/bqsr/BQSRIntegrationTest.java | 25 +++++++++++++------ .../executive/HierarchicalMicroScheduler.java | 1 - .../gatk/walkers/bqsr/BaseRecalibrator.java | 8 +++--- .../gatk/walkers/bqsr/ContextCovariate.java | 25 ++++++++----------- 4 files changed, 32 insertions(+), 27 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index 1d506b88c..e33c8c6f0 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -28,6 +28,17 @@ public class BQSRIntegrationTest extends WalkerTest { this.md5 = md5; } + public String getCommandLine() { + return " -T BaseRecalibrator" + + " -R " + reference + + " -I " + bam + + " -L " + interval + + args + + " --no_plots" + + " -knownSites " + (reference.equals(b36KGReference) ? b36dbSNP129 : hg18dbSNP132) + + " -o %s"; + } + @Override public String toString() { return String.format("BQSR(bam='%s', args='%s')", bam, args); @@ -59,16 +70,14 @@ public class BQSRIntegrationTest extends WalkerTest { @Test(dataProvider = "BQSRTest") public void testBQSR(BQSRTest params) { WalkerTestSpec spec = new WalkerTestSpec( - " -T BaseRecalibrator" + - " -R " + params.reference + - " -I " + params.bam + - " -L " + params.interval + - params.args + - " --no_plots" + - " -knownSites " + (params.reference.equals(b36KGReference) ? b36dbSNP129 : hg18dbSNP132) + - " -o %s", + params.getCommandLine(), Arrays.asList(params.md5)); executeTest("testBQSR-"+params.args, spec).getFirst(); + + WalkerTestSpec specNT2 = new WalkerTestSpec( + params.getCommandLine() + " -nt 2", + Arrays.asList(params.md5)); + executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst(); } @Test diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index 1cea14a9d..1fe2b840f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -11,7 +11,6 @@ import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor; import java.util.Collection; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index 9f5429fb9..4cba67909 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -111,16 +111,16 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization private RecalibrationTables recalibrationTables; - + private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental) private RecalibrationEngine recalibrationEngine; private int minimumQToUse; - protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped. - protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed. - protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\ + protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped. + protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed. + protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\ private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation."; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java index 8bc58063d..5fe8809fb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java @@ -51,10 +51,6 @@ public class ContextCovariate implements StandardCovariate { private static final int LENGTH_BITS = 4; private static final int LENGTH_MASK = 15; - // temporary lists to use for creating context covariate keys - private final ArrayList mismatchKeys = new ArrayList(200); - private final ArrayList indelKeys = new ArrayList(200); - // the maximum context size (number of bases) permitted; we need to keep the leftmost base free so that values are // not negative and we reserve 4 more bits to represent the length of the context; it takes 2 bits to encode one base. static final private int MAX_DNA_CONTEXT = 13; @@ -91,10 +87,8 @@ public class ContextCovariate implements StandardCovariate { if (negativeStrand) bases = BaseUtils.simpleReverseComplement(bases); - mismatchKeys.clear(); - indelKeys.clear(); - contextWith(bases, mismatchesContextSize, mismatchKeys, mismatchesKeyMask); - contextWith(bases, indelsContextSize, indelKeys, indelsKeyMask); + final ArrayList mismatchKeys = contextWith(bases, mismatchesContextSize, mismatchesKeyMask); + final ArrayList indelKeys = contextWith(bases, indelsContextSize, indelsKeyMask); final int readLength = bases.length; for (int i = 0; i < readLength; i++) { @@ -139,17 +133,19 @@ public class ContextCovariate implements StandardCovariate { * * @param bases the bases in the read to build the context from * @param contextSize context size to use building the context - * @param keys list to store the keys * @param mask mask for pulling out just the context bits */ - private static void contextWith(final byte[] bases, final int contextSize, final ArrayList keys, final int mask) { + private static ArrayList contextWith(final byte[] bases, final int contextSize, final int mask) { + + final int readLength = bases.length; + final ArrayList keys = new ArrayList(readLength); // the first contextSize-1 bases will not have enough previous context - for (int i = 1; i < contextSize && i <= bases.length; i++) + for (int i = 1; i < contextSize && i <= readLength; i++) keys.add(-1); - if (bases.length < contextSize) - return; + if (readLength < contextSize) + return keys; final int newBaseOffset = 2 * (contextSize - 1) + LENGTH_BITS; @@ -171,7 +167,6 @@ public class ContextCovariate implements StandardCovariate { } } - final int readLength = bases.length; for (int currentIndex = contextSize; currentIndex < readLength; currentIndex++) { final int baseIndex = BaseUtils.simpleBaseToBaseIndex(bases[currentIndex]); if (baseIndex == -1) { // ignore non-ACGT bases @@ -191,6 +186,8 @@ public class ContextCovariate implements StandardCovariate { keys.add(-1); } } + + return keys; } public static int keyFromContext(final String dna) {