diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 7a25e8bc8..8d86da273 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -417,7 +417,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In */ @Hidden @Argument(fullName = "pair_hmm_implementation", shortName = "pairHMM", doc = "The PairHMM implementation to use for genotype likelihood calculations", required = false) - public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING; + public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.VECTOR_LOGLESS_CACHING; @Hidden @Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false) @@ -639,6 +639,8 @@ public class HaplotypeCaller extends ActiveRegionWalker, In logger.info("Using global mismapping rate of " + phredScaledGlobalReadMismappingRate + " => " + log10GlobalReadMismappingRate + " in log10 likelihood units"); } + //static member function - set number of threads + PairHMM.setNumberOfThreads(getToolkit().getTotalNumberOfThreads()); // create our likelihood calculation engine likelihoodCalculationEngine = createLikelihoodCalculationEngine(); diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java index f039cc295..e6b9f0469 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java @@ -58,6 +58,7 @@ import java.util.HashMap; */ public abstract class JNILoglessPairHMM extends LoglessPairHMM { public abstract HashMap getHaplotypeToHaplotypeListIdxMap(); - protected long setupTime = 0; + protected long threadLocalSetupTimeDiff = 0; + protected static long pairHMMSetupTime = 0; } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM.java index e69d9ea50..9d46dac80 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM.java @@ -228,7 +228,7 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM { mLikelihoodArray = new double[readListSize*numHaplotypes]; //to store results if(doProfiling) - setupTime += (System.nanoTime() - startTime); + threadLocalSetupTimeDiff = (System.nanoTime() - startTime); //for(reads) // for(haplotypes) // compute_full_prob() @@ -251,7 +251,14 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM { readIdx += numHaplotypes; } if(doProfiling) - computeTime += (System.nanoTime() - startTime); + { + threadLocalPairHMMComputeTimeDiff = (System.nanoTime() - startTime); + //synchronized(doProfiling) + { + pairHMMComputeTime += threadLocalPairHMMComputeTimeDiff; + pairHMMSetupTime += threadLocalSetupTimeDiff; + } + } return likelihoodMap; } @@ -262,7 +269,8 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM { @Override public void close() { - System.out.println("Time spent in setup for JNI call : "+(setupTime*1e-9)); + if(doProfiling) + System.out.println("Time spent in setup for JNI call : "+(pairHMMSetupTime*1e-9)); super.close(); jniClose(); } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 287cd45d0..7776e979a 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -358,7 +358,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { final File outputVCFNoQD = executeTest("testQualByDepth calling without QD", specNoQD).getFirst().get(0); final String baseAnn = String.format("-T VariantAnnotator -R %s -V %s", REF, outputVCFNoQD.getAbsolutePath()) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -A QualByDepth"; - final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("4ccdbebcfd02be87ae5b4ad94666f011")); + final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("214799b5d1c0809f042c557155ea6e13")); specAnn.disableShadowBCF(); final File outputVCFAnn = executeTest("testQualByDepth re-annotation of QD", specAnn).getFirst().get(0); diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index ec4291e1d..7598a0c96 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "65c316f1f3987d7bc94e887999920d45"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "d1626d5fff419b8a782de954224e881f"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 55aaaab5b..b16aa38b7 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -284,7 +284,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestConservativePcrIndelModelWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1, - Arrays.asList("7592274ecd2b5ef4624dd2ed659536ef")); + Arrays.asList("1b41bf83b9c249a9a5fffb1e308668c1")); executeTest("HC calling with conservative indel error modeling on WGS intervals", spec); } diff --git a/public/VectorPairHMM/README.md b/public/VectorPairHMM/README.md index 85cc0a04a..ad4005526 100644 --- a/public/VectorPairHMM/README.md +++ b/public/VectorPairHMM/README.md @@ -44,9 +44,10 @@ end of every region (line 351 in PairHMMLikelihoodCalculationEngine). Note: Debug code has been moved to a separate class DebugJNILoglessPairHMM.java. Compiling: -Make sure you have icc (Intel C compiler) available. Currently, gcc does not seem to support all AVX -intrinsics. -This native library is called libVectorLoglessPairHMM.so +The native library (called libVectorLoglessPairHMM.so) can be compiled with icc (Intel C compiler) +or gcc versions >= 4.8.1 that support AVX intrinsics. By default, the make process tries to invoke +icc. To use gcc, edit the file 'pom.xml' (in this directory) and enable the environment variables +USE_GCC,C_COMPILER and CPP_COMPILER (edit and uncomment lines 60-62). Using Maven: Type 'mvn install' in this directory - this will build the library (by invoking 'make') and copy the native library to the directory diff --git a/public/VectorPairHMM/pom.xml b/public/VectorPairHMM/pom.xml index 41bb73211..2b69e5173 100644 --- a/public/VectorPairHMM/pom.xml +++ b/public/VectorPairHMM/pom.xml @@ -5,7 +5,7 @@ org.broadinstitute.sting sting-root - 2.8-SNAPSHOT + 3.2-SNAPSHOT ../../public/sting-root @@ -57,6 +57,9 @@ src/main/c++ ${java.home} + + + ${project.build.directory} diff --git a/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.cc b/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.cc index 0e3026f65..52accef3d 100644 --- a/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.cc +++ b/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.cc @@ -23,8 +23,8 @@ */ -#include "LoadTimeInitializer.h" #include "utils.h" +#include "LoadTimeInitializer.h" using namespace std; char* LoadTimeInitializerStatsNames[] = { @@ -43,6 +43,10 @@ LoadTimeInitializer g_load_time_initializer; LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loaded { +#if (defined(__GNUC__) || defined(__GNUG__)) && !defined(__INTEL_COMPILER) + //compiles only with gcc >= 4.8 + __builtin_cpu_init(); +#endif ConvertChar::init(); #ifndef DISABLE_FTZ //Very important to get good performance on Intel processors @@ -71,11 +75,6 @@ LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loa m_filename_to_fptr.clear(); m_written_files_set.clear(); - //Common buffer - 8MB - unsigned size = 1024*1024; - m_buffer = new uint64_t[size]; - m_buffer_size = size*sizeof(uint64_t); - initialize_function_pointers(); //Initialize static members of class diff --git a/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.h b/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.h index 8e45a4d94..d8e9aeeb4 100644 --- a/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.h +++ b/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.h @@ -27,7 +27,7 @@ #define LOAD_TIME_INITIALIZER_H #include "headers.h" #include -#include "template.h" +/*#include "template.h"*/ enum LoadTimeInitializerStatsEnum { @@ -47,10 +47,6 @@ class LoadTimeInitializer { public: LoadTimeInitializer(); //will be called when library is loaded - ~LoadTimeInitializer() - { - delete[] m_buffer; - } void print_profiling(); void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true); void debug_close(); @@ -72,8 +68,6 @@ class LoadTimeInitializer uint64_t m_data_transfer_time; //bytes copied uint64_t m_bytes_copied; - unsigned get_buffer_size() { return m_buffer_size; } - char* get_buffer() { return (char*)m_buffer; } private: std::map m_filename_to_fptr; std::set m_written_files_set; @@ -83,8 +77,6 @@ class LoadTimeInitializer double m_sum_square_stats[TOTAL_NUMBER_STATS]; uint64_t m_min_stats[TOTAL_NUMBER_STATS]; uint64_t m_max_stats[TOTAL_NUMBER_STATS]; - unsigned m_buffer_size; - uint64_t* m_buffer; }; extern LoadTimeInitializer g_load_time_initializer; diff --git a/public/VectorPairHMM/src/main/c++/Makefile b/public/VectorPairHMM/src/main/c++/Makefile index 354bca0bb..57fcc4e6d 100644 --- a/public/VectorPairHMM/src/main/c++/Makefile +++ b/public/VectorPairHMM/src/main/c++/Makefile @@ -32,14 +32,30 @@ JRE_HOME?=/opt/jdk1.7.0_25/jre JNI_COMPILATION_FLAGS=-D_REENTRANT -fPIC -I${JRE_HOME}/../include -I${JRE_HOME}/../include/linux -COMMON_COMPILATION_FLAGS=$(JNI_COMPILATION_FLAGS) -O3 -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas -CC=icc -CXX=icc +#COMMON_COMPILATION_FLAGS=$(JNI_COMPILATION_FLAGS) -O3 -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas +COMMON_COMPILATION_FLAGS=$(JNI_COMPILATION_FLAGS) -O3 -Wall $(OMPCFLAGS) -Wno-unknown-pragmas -Wno-write-strings -Wno-unused-variable -Wno-unused-but-set-variable +ifdef DISABLE_FTZ + COMMON_COMPILATION_FLAGS+=-DDISABLE_FTZ +endif + +ifdef USE_GCC + C_COMPILER?=gcc + CPP_COMPILER?=g++ + AVX_FLAGS=-mavx + SSE41_FLAGS=-msse4.1 + COMMON_COMPILATION_FLAGS+=-Wno-char-subscripts +else + C_COMPILER?=icc + CPP_COMPILER?=icc + AVX_FLAGS=-xAVX + SSE41_FLAGS=-xSSE4.1 + LIBFLAGS=-static-intel + ifdef DISABLE_FTZ + COMMON_COMPILATION_FLAGS+=-no-ftz + endif +endif LDFLAGS=-lm -lrt $(OMPLDFLAGS) -ifdef DISABLE_FTZ - COMMON_COMPILATION_FLAGS+=-DDISABLE_FTZ -no-ftz -endif PAPI_DIR=/home/karthikg/softwares/papi-5.3.0 ifdef USE_PAPI @@ -49,10 +65,6 @@ ifdef USE_PAPI endif endif -ifdef DISABLE_FTZ - COMMON_COMPILATION_FLAGS+=-DDISABLE_FTZ -no-ftz -endif - BIN=libVectorLoglessPairHMM.so pairhmm-template-main checker #BIN=checker @@ -79,8 +91,8 @@ NO_VECTOR_OBJECTS=$(NO_VECTOR_SOURCES:.cc=.o) AVX_OBJECTS=$(AVX_SOURCES:.cc=.o) SSE_OBJECTS=$(SSE_SOURCES:.cc=.o) $(NO_VECTOR_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) -$(AVX_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) -xAVX -$(SSE_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) -xSSE4.2 +$(AVX_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) $(AVX_FLAGS) +$(SSE_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) $(SSE41_FLAGS) OBJECTS=$(NO_VECTOR_OBJECTS) $(AVX_OBJECTS) $(SSE_OBJECTS) all: $(BIN) Sandbox.class copied_lib @@ -88,18 +100,18 @@ all: $(BIN) Sandbox.class copied_lib -include $(addprefix $(DEPDIR)/,$(SOURCES:.cc=.d)) checker: pairhmm-1-base.o $(COMMON_OBJECTS) - $(CXX) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS) + $(CPP_COMPILER) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS) pairhmm-template-main: pairhmm-template-main.o $(COMMON_OBJECTS) - $(CXX) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS) + $(CPP_COMPILER) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS) libVectorLoglessPairHMM.so: $(LIBOBJECTS) - $(CXX) $(OMPLFLAGS) -shared -static-intel -o $@ $(LIBOBJECTS) ${LDFLAGS} + $(CPP_COMPILER) $(OMPLFLAGS) -shared $(LIBFLAGS) -o $@ $(LIBOBJECTS) ${LDFLAGS} $(OBJECTS): %.o: %.cc @mkdir -p $(DEPDIR) - $(CXX) -c -MMD -MF $(DF) $(CXXFLAGS) $(OUTPUT_OPTION) $< + $(CPP_COMPILER) -c -MMD -MF $(DF) $(CXXFLAGS) $(OUTPUT_OPTION) $< Sandbox.class: Sandbox.java javac Sandbox.java diff --git a/public/VectorPairHMM/src/main/c++/avx_function_instantiations.cc b/public/VectorPairHMM/src/main/c++/avx_function_instantiations.cc index 6d90d5070..1a6b593c2 100644 --- a/public/VectorPairHMM/src/main/c++/avx_function_instantiations.cc +++ b/public/VectorPairHMM/src/main/c++/avx_function_instantiations.cc @@ -23,7 +23,6 @@ */ -#include "template.h" #undef SIMD_ENGINE #undef SIMD_ENGINE_SSE @@ -31,6 +30,8 @@ #define SIMD_ENGINE avx #define SIMD_ENGINE_AVX +#include "template.h" + #include "define-float.h" #include "shift_template.c" #include "pairhmm-template-kernel.cc" diff --git a/public/VectorPairHMM/src/main/c++/baseline.cc b/public/VectorPairHMM/src/main/c++/baseline.cc index a2dd8d329..17d2c279e 100644 --- a/public/VectorPairHMM/src/main/c++/baseline.cc +++ b/public/VectorPairHMM/src/main/c++/baseline.cc @@ -24,7 +24,7 @@ #include "headers.h" -#include "template.h" +#include "common_data_structure.h" #include "utils.h" #include "LoadTimeInitializer.h" using namespace std; @@ -48,17 +48,8 @@ NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log) //a straightforward array of pointers ensures that all data lies 'close' in memory, increasing //the chance of being stored together in the cache. Also, prefetchers can learn memory access //patterns for 2D arrays, not possible for array of pointers - //bool locally_allocated = false; //NUMBER* common_buffer = 0; NUMBER* common_buffer = new NUMBER[3*ROWS*COLS + ROWS*6]; - //unsigned curr_size = sizeof(NUMBER)*(3*ROWS*COLS + ROWS*6); - //if(true) - //{ - //common_buffer = new NUMBER[3*ROWS*COLS + ROWS*6]; - //locally_allocated = true; - //} - //else - //common_buffer = (NUMBER*)(g_load_time_initializer.get_buffer()); //pointers to within the allocated buffer NUMBER** common_pointer_buffer = new NUMBER*[4*ROWS]; NUMBER* ptr = common_buffer; @@ -154,7 +145,6 @@ NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log) #ifndef USE_STACK_ALLOCATION delete[] common_pointer_buffer; - //if(locally_allocated) delete[] common_buffer; #endif diff --git a/public/VectorPairHMM/src/main/c++/common_data_structure.h b/public/VectorPairHMM/src/main/c++/common_data_structure.h new file mode 100644 index 000000000..4b5f0341c --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/common_data_structure.h @@ -0,0 +1,215 @@ +#ifndef COMMON_DATA_STRUCTURE_H +#define COMMON_DATA_STRUCTURE_H + +#include "headers.h" + +#define CAT(X,Y) X####Y +#define CONCAT(X,Y) CAT(X,Y) + +#define MM 0 +#define GapM 1 +#define MX 2 +#define XX 3 +#define MY 4 +#define YY 5 + +//#define MROWS 500 +//#define MCOLS 1000 + + +#define MAX_QUAL 254 +#define MAX_JACOBIAN_TOLERANCE 8.0 +#define JACOBIAN_LOG_TABLE_STEP 0.0001 +#define JACOBIAN_LOG_TABLE_INV_STEP (1.0 / JACOBIAN_LOG_TABLE_STEP) +#define MAXN 70000 +#define LOG10_CACHE_SIZE (4*MAXN) // we need to be able to go up to 2*(2N) when calculating some of the coefficients +#define JACOBIAN_LOG_TABLE_SIZE ((int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1) + +template +struct ContextBase +{ + public: + NUMBER ph2pr[128]; + NUMBER INITIAL_CONSTANT; + NUMBER LOG10_INITIAL_CONSTANT; + NUMBER RESULT_THRESHOLD; + + static bool staticMembersInitializedFlag; + static NUMBER jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE]; + static NUMBER matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1]; + + static void initializeStaticMembers() + { + if(!staticMembersInitializedFlag) + { + //Order of calls important - Jacobian first, then MatchToMatch + initializeJacobianLogTable(); + initializeMatchToMatchProb(); + staticMembersInitializedFlag = true; + } + } + + static void deleteStaticMembers() + { + if(staticMembersInitializedFlag) + { + staticMembersInitializedFlag = false; + } + } + + //Called only once during library load - don't bother to optimize with single precision fp + static void initializeJacobianLogTable() + { + for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) { + jacobianLogTable[k] = (NUMBER)(log10(1.0 + pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP))); + } + } + + //Called only once per library load - don't bother optimizing with single fp + static void initializeMatchToMatchProb() + { + double LN10 = log(10); + double INV_LN10 = 1.0/LN10; + for (int i = 0, offset = 0; i <= MAX_QUAL; offset += ++i) + for (int j = 0; j <= i; j++) { + double log10Sum = approximateLog10SumLog10(-0.1*i, -0.1*j); + double matchToMatchLog10 = + log1p(-std::min(1.0,pow(10,log10Sum))) * INV_LN10; + matchToMatchProb[offset + j] = (NUMBER)(pow(10,matchToMatchLog10)); + } + } + //Called during computation - use single precision where possible + static int fastRound(NUMBER d) { + return (d > ((NUMBER)0.0)) ? (int) (d + ((NUMBER)0.5)) : (int) (d - ((NUMBER)0.5)); + } + //Called during computation - use single precision where possible + static NUMBER approximateLog10SumLog10(NUMBER small, NUMBER big) { + // make sure small is really the smaller value + if (small > big) { + NUMBER t = big; + big = small; + small = t; + } + + if (isinf(small) == -1 || isinf(big) == -1) + return big; + + NUMBER diff = big - small; + if (diff >= ((NUMBER)MAX_JACOBIAN_TOLERANCE)) + return big; + + // OK, so |y-x| < tol: we use the following identity then: + // we need to compute log10(10^x + 10^y) + // By Jacobian logarithm identity, this is equal to + // max(x,y) + log10(1+10^-abs(x-y)) + // we compute the second term as a table lookup with integer quantization + // we have pre-stored correction for 0,0.1,0.2,... 10.0 + int ind = fastRound((NUMBER)(diff * ((NUMBER)JACOBIAN_LOG_TABLE_INV_STEP))); // hard rounding + return big + jacobianLogTable[ind]; + } +}; + +template +struct Context : public ContextBase +{}; + +template<> +struct Context : public ContextBase +{ + Context():ContextBase() + { + for (int x = 0; x < 128; x++) + ph2pr[x] = pow(10.0, -((double)x) / 10.0); + + INITIAL_CONSTANT = ldexp(1.0, 1020.0); + LOG10_INITIAL_CONSTANT = log10(INITIAL_CONSTANT); + RESULT_THRESHOLD = 0.0; + } + + double LOG10(double v){ return log10(v); } + inline double POW(double b, double e) { return pow(b,e); } + + static double _(double n){ return n; } + static double _(float n){ return ((double) n); } +}; + +template<> +struct Context : public ContextBase +{ + Context() : ContextBase() + { + for (int x = 0; x < 128; x++) + { + ph2pr[x] = powf(10.f, -((float)x) / 10.f); + } + + INITIAL_CONSTANT = ldexpf(1.f, 120.f); + LOG10_INITIAL_CONSTANT = log10f(INITIAL_CONSTANT); + RESULT_THRESHOLD = ldexpf(1.f, -110.f); + } + + float LOG10(float v){ return log10f(v); } + inline float POW(float b, float e) { return powf(b,e); } + + static float _(double n){ return ((float) n); } + static float _(float n){ return n; } +}; + +#define SET_MATCH_TO_MATCH_PROB(output, insQual, delQual) \ +{ \ + int minQual = delQual; \ + int maxQual = insQual; \ + if (insQual <= delQual) \ + { \ + minQual = insQual; \ + maxQual = delQual; \ + } \ + (output) = (MAX_QUAL < maxQual) ? \ + ((NUMBER)1.0) - ctx.POW(((NUMBER)10), ctx.approximateLog10SumLog10(((NUMBER)-0.1)*minQual, ((NUMBER)-0.1)*maxQual)) \ + : ctx.matchToMatchProb[((maxQual * (maxQual + 1)) >> 1) + minQual]; \ +} + + + +typedef struct +{ + int rslen, haplen; + /*int *q, *i, *d, *c;*/ + /*int q[MROWS], i[MROWS], d[MROWS], c[MROWS];*/ + char *q, *i, *d, *c; + char *hap, *rs; + int *ihap; + int *irs; +} testcase; + +#define MIN_ACCEPTED 1e-28f +#define NUM_DISTINCT_CHARS 5 +#define AMBIG_CHAR 4 + +class ConvertChar { + + static uint8_t conversionTable[255] ; + +public: + + static void init() { + assert (NUM_DISTINCT_CHARS == 5) ; + assert (AMBIG_CHAR == 4) ; + + conversionTable['A'] = 0 ; + conversionTable['C'] = 1 ; + conversionTable['T'] = 2 ; + conversionTable['G'] = 3 ; + conversionTable['N'] = 4 ; + } + + static inline uint8_t get(uint8_t input) { + return conversionTable[input] ; + } + +}; + +int normalize(char c); +int read_testcase(testcase *tc, FILE* ifp=0); + +#endif diff --git a/public/VectorPairHMM/src/main/c++/define-double.h b/public/VectorPairHMM/src/main/c++/define-double.h index 2067d369c..96fc274c4 100644 --- a/public/VectorPairHMM/src/main/c++/define-double.h +++ b/public/VectorPairHMM/src/main/c++/define-double.h @@ -42,33 +42,33 @@ #undef MASK_TYPE #undef MASK_ALL_ONES -#undef SET_VEC_ZERO(__vec) -#undef VEC_OR(__v1, __v2) -#undef VEC_ADD(__v1, __v2) -#undef VEC_SUB(__v1, __v2) -#undef VEC_MUL(__v1, __v2) -#undef VEC_DIV(__v1, __v2) -#undef VEC_BLEND(__v1, __v2, __mask) -#undef VEC_BLENDV(__v1, __v2, __maskV) -#undef VEC_CAST_256_128(__v1) -#undef VEC_EXTRACT_128(__v1, __im) -#undef VEC_EXTRACT_UNIT(__v1, __im) -#undef VEC_SET1_VAL128(__val) -#undef VEC_MOVE(__v1, __val) -#undef VEC_CAST_128_256(__v1) -#undef VEC_INSERT_VAL(__v1, __val, __pos) -#undef VEC_CVT_128_256(__v1) -#undef VEC_SET1_VAL(__val) -#undef VEC_POPCVT_CHAR(__ch) -#undef VEC_LDPOPCVT_CHAR(__addr) -#undef VEC_CMP_EQ(__v1, __v2) -#undef VEC_SET_LSE(__val) -#undef SHIFT_HAP(__v1, __val) +#undef SET_VEC_ZERO +#undef VEC_OR +#undef VEC_ADD +#undef VEC_SUB +#undef VEC_MUL +#undef VEC_DIV +#undef VEC_BLEND +#undef VEC_BLENDV +#undef VEC_CAST_256_128 +#undef VEC_EXTRACT_128 +#undef VEC_EXTRACT_UNIT +#undef VEC_SET1_VAL128 +#undef VEC_MOVE +#undef VEC_CAST_128_256 +#undef VEC_INSERT_VAL +#undef VEC_CVT_128_256 +#undef VEC_SET1_VAL +#undef VEC_POPCVT_CHAR +#undef VEC_LDPOPCVT_CHAR +#undef VEC_CMP_EQ +#undef VEC_SET_LSE +#undef SHIFT_HAP #undef MASK_VEC -#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) -#undef VEC_SHIFT_LEFT_1BIT(__vs) +#undef VEC_SSE_TO_AVX +#undef VEC_SHIFT_LEFT_1BIT #undef MASK_ALL_ONES -#undef COMPARE_VECS(__v1, __v2) +#undef COMPARE_VECS #undef _256_INT_TYPE #undef BITMASK_VEC #endif diff --git a/public/VectorPairHMM/src/main/c++/define-float.h b/public/VectorPairHMM/src/main/c++/define-float.h index 318f78280..056bd53f5 100644 --- a/public/VectorPairHMM/src/main/c++/define-float.h +++ b/public/VectorPairHMM/src/main/c++/define-float.h @@ -42,33 +42,33 @@ #undef MASK_TYPE #undef MASK_ALL_ONES -#undef SET_VEC_ZERO(__vec) -#undef VEC_OR(__v1, __v2) -#undef VEC_ADD(__v1, __v2) -#undef VEC_SUB(__v1, __v2) -#undef VEC_MUL(__v1, __v2) -#undef VEC_DIV(__v1, __v2) -#undef VEC_BLEND(__v1, __v2, __mask) -#undef VEC_BLENDV(__v1, __v2, __maskV) -#undef VEC_CAST_256_128(__v1) -#undef VEC_EXTRACT_128(__v1, __im) -#undef VEC_EXTRACT_UNIT(__v1, __im) -#undef VEC_SET1_VAL128(__val) -#undef VEC_MOVE(__v1, __val) -#undef VEC_CAST_128_256(__v1) -#undef VEC_INSERT_VAL(__v1, __val, __pos) -#undef VEC_CVT_128_256(__v1) -#undef VEC_SET1_VAL(__val) -#undef VEC_POPCVT_CHAR(__ch) -#undef VEC_LDPOPCVT_CHAR(__addr) -#undef VEC_CMP_EQ(__v1, __v2) -#undef VEC_SET_LSE(__val) -#undef SHIFT_HAP(__v1, __val) +#undef SET_VEC_ZERO +#undef VEC_OR +#undef VEC_ADD +#undef VEC_SUB +#undef VEC_MUL +#undef VEC_DIV +#undef VEC_BLEND +#undef VEC_BLENDV +#undef VEC_CAST_256_128 +#undef VEC_EXTRACT_128 +#undef VEC_EXTRACT_UNIT +#undef VEC_SET1_VAL128 +#undef VEC_MOVE +#undef VEC_CAST_128_256 +#undef VEC_INSERT_VAL +#undef VEC_CVT_128_256 +#undef VEC_SET1_VAL +#undef VEC_POPCVT_CHAR +#undef VEC_LDPOPCVT_CHAR +#undef VEC_CMP_EQ +#undef VEC_SET_LSE +#undef SHIFT_HAP #undef MASK_VEC -#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) -#undef VEC_SHIFT_LEFT_1BIT(__vs) +#undef VEC_SSE_TO_AVX +#undef VEC_SHIFT_LEFT_1BIT #undef MASK_ALL_ONES -#undef COMPARE_VECS(__v1, __v2) +#undef COMPARE_VECS #undef _256_INT_TYPE #undef BITMASK_VEC #endif diff --git a/public/VectorPairHMM/src/main/c++/define-sse-double.h b/public/VectorPairHMM/src/main/c++/define-sse-double.h index 2d271a854..9456e1453 100644 --- a/public/VectorPairHMM/src/main/c++/define-sse-double.h +++ b/public/VectorPairHMM/src/main/c++/define-sse-double.h @@ -40,35 +40,35 @@ #undef MASK_TYPE #undef MASK_ALL_ONES -#undef VEC_EXTRACT_UNIT(__v1, __im) -#undef VEC_INSERT_UNIT(__v1,__ins,__im) -#undef SET_VEC_ZERO(__vec) -#undef VEC_OR(__v1, __v2) -#undef VEC_ADD(__v1, __v2) -#undef VEC_SUB(__v1, __v2) -#undef VEC_MUL(__v1, __v2) -#undef VEC_DIV(__v1, __v2) -#undef VEC_BLEND(__v1, __v2, __mask) -#undef VEC_BLENDV(__v1, __v2, __maskV) -#undef VEC_CAST_256_128(__v1) -#undef VEC_EXTRACT_128(__v1, __im) -#undef VEC_EXTRACT_UNIT(__v1, __im) -#undef VEC_SET1_VAL128(__val) -#undef VEC_MOVE(__v1, __val) -#undef VEC_CAST_128_256(__v1) -#undef VEC_INSERT_VAL(__v1, __val, __pos) -#undef VEC_CVT_128_256(__v1) -#undef VEC_SET1_VAL(__val) -#undef VEC_POPCVT_CHAR(__ch) -#undef VEC_LDPOPCVT_CHAR(__addr) -#undef VEC_CMP_EQ(__v1, __v2) -#undef VEC_SET_LSE(__val) -#undef SHIFT_HAP(__v1, __val) +#undef VEC_EXTRACT_UNIT +#undef VEC_INSERT_UNIT +#undef SET_VEC_ZERO +#undef VEC_OR +#undef VEC_ADD +#undef VEC_SUB +#undef VEC_MUL +#undef VEC_DIV +#undef VEC_BLEND +#undef VEC_BLENDV +#undef VEC_CAST_256_128 +#undef VEC_EXTRACT_128 +#undef VEC_EXTRACT_UNIT +#undef VEC_SET1_VAL128 +#undef VEC_MOVE +#undef VEC_CAST_128_256 +#undef VEC_INSERT_VAL +#undef VEC_CVT_128_256 +#undef VEC_SET1_VAL +#undef VEC_POPCVT_CHAR +#undef VEC_LDPOPCVT_CHAR +#undef VEC_CMP_EQ +#undef VEC_SET_LSE +#undef SHIFT_HAP #undef MASK_VEC -#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) -#undef VEC_SHIFT_LEFT_1BIT(__vs) +#undef VEC_SSE_TO_AVX +#undef VEC_SHIFT_LEFT_1BIT #undef MASK_ALL_ONES -#undef COMPARE_VECS(__v1, __v2) +#undef COMPARE_VECS #undef _256_INT_TYPE #undef BITMASK_VEC #endif diff --git a/public/VectorPairHMM/src/main/c++/define-sse-float.h b/public/VectorPairHMM/src/main/c++/define-sse-float.h index 20af947dd..e9371c000 100644 --- a/public/VectorPairHMM/src/main/c++/define-sse-float.h +++ b/public/VectorPairHMM/src/main/c++/define-sse-float.h @@ -40,35 +40,35 @@ #undef MASK_TYPE #undef MASK_ALL_ONES -#undef VEC_EXTRACT_UNIT(__v1, __im) -#undef VEC_INSERT_UNIT(__v1,__ins,__im) -#undef SET_VEC_ZERO(__vec) -#undef VEC_OR(__v1, __v2) -#undef VEC_ADD(__v1, __v2) -#undef VEC_SUB(__v1, __v2) -#undef VEC_MUL(__v1, __v2) -#undef VEC_DIV(__v1, __v2) -#undef VEC_BLEND(__v1, __v2, __mask) -#undef VEC_BLENDV(__v1, __v2, __maskV) -#undef VEC_CAST_256_128(__v1) -#undef VEC_EXTRACT_128(__v1, __im) -#undef VEC_EXTRACT_UNIT(__v1, __im) -#undef VEC_SET1_VAL128(__val) -#undef VEC_MOVE(__v1, __val) -#undef VEC_CAST_128_256(__v1) -#undef VEC_INSERT_VAL(__v1, __val, __pos) -#undef VEC_CVT_128_256(__v1) -#undef VEC_SET1_VAL(__val) -#undef VEC_POPCVT_CHAR(__ch) -#undef VEC_LDPOPCVT_CHAR(__addr) -#undef VEC_CMP_EQ(__v1, __v2) -#undef VEC_SET_LSE(__val) -#undef SHIFT_HAP(__v1, __val) +#undef VEC_EXTRACT_UNIT +#undef VEC_INSERT_UNIT +#undef SET_VEC_ZERO +#undef VEC_OR +#undef VEC_ADD +#undef VEC_SUB +#undef VEC_MUL +#undef VEC_DIV +#undef VEC_BLEND +#undef VEC_BLENDV +#undef VEC_CAST_256_128 +#undef VEC_EXTRACT_128 +#undef VEC_EXTRACT_UNIT +#undef VEC_SET1_VAL128 +#undef VEC_MOVE +#undef VEC_CAST_128_256 +#undef VEC_INSERT_VAL +#undef VEC_CVT_128_256 +#undef VEC_SET1_VAL +#undef VEC_POPCVT_CHAR +#undef VEC_LDPOPCVT_CHAR +#undef VEC_CMP_EQ +#undef VEC_SET_LSE +#undef SHIFT_HAP #undef MASK_VEC -#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) -#undef VEC_SHIFT_LEFT_1BIT(__vs) +#undef VEC_SSE_TO_AVX +#undef VEC_SHIFT_LEFT_1BIT #undef MASK_ALL_ONES -#undef COMPARE_VECS(__v1, __v2) +#undef COMPARE_VECS #undef _256_INT_TYPE #undef BITMASK_VEC #endif diff --git a/public/VectorPairHMM/src/main/c++/jni_common.h b/public/VectorPairHMM/src/main/c++/jni_common.h index ee43da2ec..b3139d4ac 100644 --- a/public/VectorPairHMM/src/main/c++/jni_common.h +++ b/public/VectorPairHMM/src/main/c++/jni_common.h @@ -37,7 +37,7 @@ /*#define DUMP_TO_SANDBOX 1*/ -#define DIRECT_ACCESS_TO_JAVA_HEAP_MEMORY 1 +/*#define DIRECT_ACCESS_TO_JAVA_HEAP_MEMORY 1*/ #ifdef DIRECT_ACCESS_TO_JAVA_HEAP_MEMORY //Gets direct access to Java arrays diff --git a/public/VectorPairHMM/src/main/c++/org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc b/public/VectorPairHMM/src/main/c++/org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc index 8a3f8b5bc..641e2016d 100644 --- a/public/VectorPairHMM/src/main/c++/org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc +++ b/public/VectorPairHMM/src/main/c++/org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc @@ -26,7 +26,6 @@ #include "headers.h" #include "jni_common.h" #include "org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.h" -#include "template.h" #include "utils.h" #include "LoadTimeInitializer.h" #include "jnidebug.h" diff --git a/public/VectorPairHMM/src/main/c++/org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc b/public/VectorPairHMM/src/main/c++/org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc index 220b1aa60..9c00c45af 100644 --- a/public/VectorPairHMM/src/main/c++/org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc +++ b/public/VectorPairHMM/src/main/c++/org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc @@ -26,7 +26,7 @@ #include "headers.h" #include "jni_common.h" #include "org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.h" -#include "template.h" +//#include "template.h" #include "utils.h" #include "LoadTimeInitializer.h" diff --git a/public/VectorPairHMM/src/main/c++/pairhmm-1-base.cc b/public/VectorPairHMM/src/main/c++/pairhmm-1-base.cc index d2cc7d903..20e9910a6 100644 --- a/public/VectorPairHMM/src/main/c++/pairhmm-1-base.cc +++ b/public/VectorPairHMM/src/main/c++/pairhmm-1-base.cc @@ -29,7 +29,6 @@ using namespace std; int main(int argc, char** argv) { -#define BATCH_SIZE 10000 if(argc < 2) { cerr << "Needs path to input file as argument\n"; diff --git a/public/VectorPairHMM/src/main/c++/pairhmm-template-main.cc b/public/VectorPairHMM/src/main/c++/pairhmm-template-main.cc index 00946d086..4f8304689 100644 --- a/public/VectorPairHMM/src/main/c++/pairhmm-template-main.cc +++ b/public/VectorPairHMM/src/main/c++/pairhmm-template-main.cc @@ -24,12 +24,11 @@ #include "headers.h" -#include "template.h" -#include "vector_defs.h" #define SIMD_ENGINE avx #define SIMD_ENGINE_AVX +#include "utils.h" #define BATCH_SIZE 10000 #define RUN_HYBRID diff --git a/public/VectorPairHMM/src/main/c++/sse_function_instantiations.cc b/public/VectorPairHMM/src/main/c++/sse_function_instantiations.cc index 550272494..a8b88f6c7 100644 --- a/public/VectorPairHMM/src/main/c++/sse_function_instantiations.cc +++ b/public/VectorPairHMM/src/main/c++/sse_function_instantiations.cc @@ -23,7 +23,6 @@ */ -#include "template.h" #undef SIMD_ENGINE #undef SIMD_ENGINE_AVX @@ -31,6 +30,8 @@ #define SIMD_ENGINE sse #define SIMD_ENGINE_SSE +#include "template.h" + #include "define-sse-float.h" #include "shift_template.c" #include "pairhmm-template-kernel.cc" diff --git a/public/VectorPairHMM/src/main/c++/template.h b/public/VectorPairHMM/src/main/c++/template.h index ce4dbfc86..550e76396 100644 --- a/public/VectorPairHMM/src/main/c++/template.h +++ b/public/VectorPairHMM/src/main/c++/template.h @@ -28,27 +28,17 @@ #include "headers.h" -#define MM 0 -#define GapM 1 -#define MX 2 -#define XX 3 -#define MY 4 -#define YY 5 - -//#define MROWS 500 -//#define MCOLS 1000 - -#define CAT(X,Y) X####Y -#define CONCAT(X,Y) CAT(X,Y) #define ALIGNED __attribute__((aligned(32))) +#ifdef SIMD_ENGINE_AVX typedef union __attribute__((aligned(32))) { ALIGNED __m256 ALIGNED d; ALIGNED __m128i ALIGNED s[2]; ALIGNED float ALIGNED f[8]; ALIGNED __m256i ALIGNED i; } ALIGNED mix_F ALIGNED; +#endif typedef union __attribute__((aligned(32))) { ALIGNED __m128 ALIGNED d; @@ -81,12 +71,14 @@ typedef union ALIGNED ALIGNED float ALIGNED f; } ALIGNED IF_32 ALIGNED; +#ifdef SIMD_ENGINE_AVX typedef union __attribute__((aligned(32))) { ALIGNED __m256d ALIGNED d; ALIGNED __m128i ALIGNED s[2]; ALIGNED double ALIGNED f[4]; ALIGNED __m256i ALIGNED i; } ALIGNED mix_D ALIGNED; +#endif typedef union __attribute__((aligned(32))) { ALIGNED __m128d ALIGNED d; @@ -120,200 +112,7 @@ typedef union ALIGNED } ALIGNED IF_64 ALIGNED; -#define MAX_QUAL 254 -#define MAX_JACOBIAN_TOLERANCE 8.0 -#define JACOBIAN_LOG_TABLE_STEP 0.0001 -#define JACOBIAN_LOG_TABLE_INV_STEP (1.0 / JACOBIAN_LOG_TABLE_STEP) -#define MAXN 70000 -#define LOG10_CACHE_SIZE (4*MAXN) // we need to be able to go up to 2*(2N) when calculating some of the coefficients -#define JACOBIAN_LOG_TABLE_SIZE ((int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1) - -template -struct ContextBase -{ - public: - NUMBER ph2pr[128]; - NUMBER INITIAL_CONSTANT; - NUMBER LOG10_INITIAL_CONSTANT; - NUMBER RESULT_THRESHOLD; - - static bool staticMembersInitializedFlag; - static NUMBER jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE]; - static NUMBER matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1]; - - static void initializeStaticMembers() - { - if(!staticMembersInitializedFlag) - { - //Order of calls important - Jacobian first, then MatchToMatch - initializeJacobianLogTable(); - initializeMatchToMatchProb(); - staticMembersInitializedFlag = true; - } - } - - static void deleteStaticMembers() - { - if(staticMembersInitializedFlag) - { - staticMembersInitializedFlag = false; - } - } - - //Called only once during library load - don't bother to optimize with single precision fp - static void initializeJacobianLogTable() - { - for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) { - jacobianLogTable[k] = (NUMBER)(log10(1.0 + pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP))); - } - } - - //Called only once per library load - don't bother optimizing with single fp - static void initializeMatchToMatchProb() - { - double LN10 = log(10); - double INV_LN10 = 1.0/LN10; - for (int i = 0, offset = 0; i <= MAX_QUAL; offset += ++i) - for (int j = 0; j <= i; j++) { - double log10Sum = approximateLog10SumLog10(-0.1*i, -0.1*j); - double matchToMatchLog10 = - log1p(-std::min(1.0,pow(10,log10Sum))) * INV_LN10; - matchToMatchProb[offset + j] = (NUMBER)(pow(10,matchToMatchLog10)); - } - } - //Called during computation - use single precision where possible - static int fastRound(NUMBER d) { - return (d > ((NUMBER)0.0)) ? (int) (d + ((NUMBER)0.5)) : (int) (d - ((NUMBER)0.5)); - } - //Called during computation - use single precision where possible - static NUMBER approximateLog10SumLog10(NUMBER small, NUMBER big) { - // make sure small is really the smaller value - if (small > big) { - NUMBER t = big; - big = small; - small = t; - } - - if (isinf(small) == -1 || isinf(big) == -1) - return big; - - NUMBER diff = big - small; - if (diff >= ((NUMBER)MAX_JACOBIAN_TOLERANCE)) - return big; - - // OK, so |y-x| < tol: we use the following identity then: - // we need to compute log10(10^x + 10^y) - // By Jacobian logarithm identity, this is equal to - // max(x,y) + log10(1+10^-abs(x-y)) - // we compute the second term as a table lookup with integer quantization - // we have pre-stored correction for 0,0.1,0.2,... 10.0 - int ind = fastRound((NUMBER)(diff * ((NUMBER)JACOBIAN_LOG_TABLE_INV_STEP))); // hard rounding - return big + jacobianLogTable[ind]; - } -}; - -template -struct Context : public ContextBase -{}; - -template<> -struct Context : public ContextBase -{ - Context():ContextBase() - { - for (int x = 0; x < 128; x++) - ph2pr[x] = pow(10.0, -((double)x) / 10.0); - - INITIAL_CONSTANT = ldexp(1.0, 1020.0); - LOG10_INITIAL_CONSTANT = log10(INITIAL_CONSTANT); - RESULT_THRESHOLD = 0.0; - } - - double LOG10(double v){ return log10(v); } - inline double POW(double b, double e) { return pow(b,e); } - - static double _(double n){ return n; } - static double _(float n){ return ((double) n); } -}; - -template<> -struct Context : public ContextBase -{ - Context() : ContextBase() - { - for (int x = 0; x < 128; x++) - { - ph2pr[x] = powf(10.f, -((float)x) / 10.f); - } - - INITIAL_CONSTANT = ldexpf(1.f, 120.f); - LOG10_INITIAL_CONSTANT = log10f(INITIAL_CONSTANT); - RESULT_THRESHOLD = ldexpf(1.f, -110.f); - } - - float LOG10(float v){ return log10f(v); } - inline float POW(float b, float e) { return powf(b,e); } - - static float _(double n){ return ((float) n); } - static float _(float n){ return n; } -}; - -#define SET_MATCH_TO_MATCH_PROB(output, insQual, delQual) \ -{ \ - int minQual = delQual; \ - int maxQual = insQual; \ - if (insQual <= delQual) \ - { \ - minQual = insQual; \ - maxQual = delQual; \ - } \ - (output) = (MAX_QUAL < maxQual) ? \ - ((NUMBER)1.0) - ctx.POW(((NUMBER)10), ctx.approximateLog10SumLog10(((NUMBER)-0.1)*minQual, ((NUMBER)-0.1)*maxQual)) \ - : ctx.matchToMatchProb[((maxQual * (maxQual + 1)) >> 1) + minQual]; \ -} - -typedef struct -{ - int rslen, haplen; - /*int *q, *i, *d, *c;*/ - /*int q[MROWS], i[MROWS], d[MROWS], c[MROWS];*/ - char *q, *i, *d, *c; - char *hap, *rs; - int *ihap; - int *irs; -} testcase; - -int normalize(char c); -int read_testcase(testcase *tc, FILE* ifp=0); - - -#define MIN_ACCEPTED 1e-28f -#define NUM_DISTINCT_CHARS 5 -#define AMBIG_CHAR 4 - -class ConvertChar { - - static uint8_t conversionTable[255] ; - -public: - - static void init() { - assert (NUM_DISTINCT_CHARS == 5) ; - assert (AMBIG_CHAR == 4) ; - - conversionTable['A'] = 0 ; - conversionTable['C'] = 1 ; - conversionTable['T'] = 2 ; - conversionTable['G'] = 3 ; - conversionTable['N'] = 4 ; - } - - static inline uint8_t get(uint8_t input) { - return conversionTable[input] ; - } - -}; - +#include "common_data_structure.h" #endif diff --git a/public/VectorPairHMM/src/main/c++/utils.cc b/public/VectorPairHMM/src/main/c++/utils.cc index 6c623e9e5..3b0ce35ee 100644 --- a/public/VectorPairHMM/src/main/c++/utils.cc +++ b/public/VectorPairHMM/src/main/c++/utils.cc @@ -22,11 +22,8 @@ *THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - #include "headers.h" -#include "template.h" #include "utils.h" -#include "vector_defs.h" #include "LoadTimeInitializer.h" using namespace std; @@ -36,51 +33,107 @@ uint8_t ConvertChar::conversionTable[255]; float (*g_compute_full_prob_float)(testcase *tc, float* before_last_log) = 0; double (*g_compute_full_prob_double)(testcase *tc, double* before_last_log) = 0; //Static members in ContextBase +template<> bool ContextBase::staticMembersInitializedFlag = false; -double ContextBase::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE]; -double ContextBase::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1]; +template<> +double ContextBase::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE] = { }; +template<> +double ContextBase::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1] = { }; +template<> bool ContextBase::staticMembersInitializedFlag = false; -float ContextBase::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE]; -float ContextBase::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1]; +template<> +float ContextBase::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE] = { }; +template<> +float ContextBase::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1] = { }; +bool search_file_for_string(string filename, string search_string) +{ + ifstream fptr; + fptr.open(filename.c_str(),ios::in); + if(fptr.is_open()) + { + string buffer; + buffer.clear(); + buffer.resize(4096); + bool retvalue = false; + while(!fptr.eof()) + { + fptr.getline(&(buffer[0]), 4096); + if(buffer.find(search_string) != string::npos) //found string + { + retvalue = true; + break; + } + } + buffer.clear(); + fptr.close(); + return retvalue; + } + else + return false; +} + +bool is_cpuid_ecx_bit_set(int eax, int bitidx) +{ + int ecx = 0, edx = 0, ebx = 0; + __asm__ ("cpuid" + :"=b" (ebx), + "=c" (ecx), + "=d" (edx) + :"a" (eax) + ); + return (((ecx >> bitidx)&1) == 1); +} + bool is_avx_supported() { - return (_may_i_use_cpu_feature(_FEATURE_AVX) > 0); - //int ecx = 0, edx = 0, ebx = 0; - //__asm__("cpuid" - //: "=b" (ebx), - //"=c" (ecx), - //"=d" (edx) - //: "a" (1) - //); - //return ((ecx >> 28)&1) == 1; +#ifdef __INTEL_COMPILER + bool use_avx = _may_i_use_cpu_feature(_FEATURE_AVX); + if(use_avx) + return true; + else + { + //check if core supports AVX, but kernel does not and print info message + if(!is_cpuid_ecx_bit_set(1, 28)) //core does not support AVX + return false; + //else fall through to end of function + } +#else + if(!__builtin_cpu_supports("avx")) //core does not support AVX + return false; + else + { + //core supports AVX, check if kernel supports + if(search_file_for_string("/proc/cpuinfo","avx")) + return true; + //else fall through to end of function + + } +#endif //__INTEL_COMPILER + clog << "INFO: Your CPU supports AVX vector instructions, but your kernel does not. Try upgrading to a kernel that supports AVX.\n"; + clog << "INFO: Your program will run correctly, but slower than the AVX version\n"; + return false; } bool is_sse41_supported() { +#ifdef __INTEL_COMPILER return (_may_i_use_cpu_feature(_FEATURE_SSE4_1) > 0); - //int ecx = 0, edx = 0, ebx = 0; - //__asm__("cpuid" - //: "=b" (ebx), - //"=c" (ecx), - //"=d" (edx) - //: "a" (1) - //); - //return ((ecx >> 19)&1) == 1; +#else + return __builtin_cpu_supports("sse4.1"); +#endif + //return is_cpuid_ecx_bit_set(1, 19); } bool is_sse42_supported() { +#ifdef __INTEL_COMPILER return (_may_i_use_cpu_feature(_FEATURE_SSE4_2) > 0); - //int ecx = 0, edx = 0, ebx = 0; - //__asm__("cpuid" - //: "=b" (ebx), - //"=c" (ecx), - //"=d" (edx) - //: "a" (1) - //); - //return ((ecx >> 20)&1) == 1; +#else + return __builtin_cpu_supports("sse4.2"); +#endif + //return is_cpuid_ecx_bit_set(1, 20); } uint64_t get_machine_capabilities() @@ -154,10 +207,14 @@ int read_testcase(testcase *tc, FILE* ifp) tc->haplen = strlen(tc->hap); tc->rslen = strlen(tc->rs); - assert(strlen(q) == tc->rslen); - assert(strlen(i) == tc->rslen); - assert(strlen(d) == tc->rslen); - assert(strlen(c) == tc->rslen); + assert(strlen(q) == (size_t)tc->rslen); + assert(strlen(i) == (size_t)tc->rslen); + assert(strlen(d) == (size_t)tc->rslen); + assert(strlen(c) == (size_t)tc->rslen); + + g_load_time_initializer.update_stat(READ_LENGTH_IDX, tc->rslen); + g_load_time_initializer.update_stat(HAPLOTYPE_LENGTH_IDX, tc->haplen); + g_load_time_initializer.update_stat(PRODUCT_READ_LENGTH_HAPLOTYPE_LENGTH_IDX, tc->haplen*tc->rslen); //assert(tc->rslen < MROWS); //tc->ihap = (int *) malloc(tc->haplen*sizeof(int)); //tc->irs = (int *) malloc(tc->rslen*sizeof(int)); @@ -279,15 +336,15 @@ int read_mod_testcase(ifstream& fptr, testcase* tc, bool reformat) tc->c = new char[tc->rslen]; //cout << "Lengths "<haplen <<" "<rslen<<"\n"; memcpy(tc->rs, tokens[1].c_str(),tokens[1].size()); - assert(tokens.size() == 2 + 4*(tc->rslen)); + assert(tokens.size() == (size_t)(2 + 4*(tc->rslen))); //assert(tc->rslen < MROWS); - for(unsigned j=0;jrslen;++j) + for(int j=0;jrslen;++j) tc->q[j] = (char)convToInt(tokens[2+0*tc->rslen+j]); - for(unsigned j=0;jrslen;++j) + for(int j=0;jrslen;++j) tc->i[j] = (char)convToInt(tokens[2+1*tc->rslen+j]); - for(unsigned j=0;jrslen;++j) + for(int j=0;jrslen;++j) tc->d[j] = (char)convToInt(tokens[2+2*tc->rslen+j]); - for(unsigned j=0;jrslen;++j) + for(int j=0;jrslen;++j) tc->c[j] = (char)convToInt(tokens[2+3*tc->rslen+j]); if(reformat) @@ -297,16 +354,16 @@ int read_mod_testcase(ifstream& fptr, testcase* tc, bool reformat) assert(ofptr.is_open()); ofptr << tokens[0] << " "; ofptr << tokens[1] << " "; - for(unsigned j=0;jrslen;++j) + for(int j=0;jrslen;++j) ofptr << ((char)(tc->q[j]+33)); ofptr << " "; - for(unsigned j=0;jrslen;++j) + for(int j=0;jrslen;++j) ofptr << ((char)(tc->i[j]+33)); ofptr << " "; - for(unsigned j=0;jrslen;++j) + for(int j=0;jrslen;++j) ofptr << ((char)(tc->d[j]+33)); ofptr << " "; - for(unsigned j=0;jrslen;++j) + for(int j=0;jrslen;++j) ofptr << ((char)(tc->c[j]+33)); ofptr << " 0 false\n"; @@ -363,6 +420,10 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size, ifptr.open(filename); assert(ifptr.is_open()); } +#ifdef PRINT_PER_INTERVAL_TIMINGS + ofstream times_fptr; + times_fptr.open("native_timed_intervals.csv",ios::out); +#endif vector tc_vector; tc_vector.clear(); testcase tc; @@ -377,11 +438,11 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size, uint32_t no_kernel_mask = (1 << 17); //bit 16 user mode, bit 17 kernel mode PAPI_num_counters(); int events[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 }; - char* eventnames[NUM_PAPI_COUNTERS]= { "cycles", "itlb_walk_cycles", "dtlb_load_walk_cycles", "dtlb_store_walk_cycles" }; + char* eventnames[NUM_PAPI_COUNTERS]= { "cycles", "l1_pending_miss", "lfb_hit", "l2_hit" }; assert(PAPI_event_name_to_code("UNHALTED_REFERENCE_CYCLES:u=1:k=1",&(events[0])) == PAPI_OK); - assert(PAPI_event_name_to_code("ITLB_MISSES:WALK_DURATION", &(events[1])) == PAPI_OK); - assert(PAPI_event_name_to_code("DTLB_LOAD_MISSES:WALK_DURATION", &(events[2])) == PAPI_OK); - assert(PAPI_event_name_to_code("DTLB_STORE_MISSES:WALK_DURATION", &(events[3])) == PAPI_OK); + assert(PAPI_event_name_to_code("L1D_PEND_MISS:OCCURRENCES", &(events[1])) == PAPI_OK); + assert(PAPI_event_name_to_code("MEM_LOAD_UOPS_RETIRED:HIT_LFB", &(events[2])) == PAPI_OK); + assert(PAPI_event_name_to_code("MEM_LOAD_UOPS_RETIRED:L2_HIT", &(events[3])) == PAPI_OK); long long values[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 }; long long accum_values[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 }; #endif @@ -398,6 +459,9 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size, baseline_results_vec.clear(); results_vec.resize(tc_vector.size()); baseline_results_vec.resize(tc_vector.size()); + g_load_time_initializer.update_stat(NUM_TESTCASES_IDX, tc_vector.size()); + g_load_time_initializer.update_stat(NUM_READS_IDX, tc_vector.size()); + g_load_time_initializer.update_stat(NUM_HAPLOTYPES_IDX, tc_vector.size()); struct timespec start_time; #ifdef USE_PAPI assert(PAPI_start_counters(events, NUM_PAPI_COUNTERS) == PAPI_OK); @@ -429,7 +493,11 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size, #ifdef USE_PAPI assert(PAPI_stop_counters(values, NUM_PAPI_COUNTERS) == PAPI_OK); #endif - vector_compute_time += diff_time(start_time); + uint64_t curr_interval = diff_time(start_time); +#ifdef PRINT_PER_INTERVAL_TIMINGS + times_fptr << curr_interval << "\n"; +#endif + vector_compute_time += curr_interval; #ifdef USE_PAPI for(unsigned k=0;k std::string to_string(T obj) @@ -50,6 +50,15 @@ extern double (*g_compute_full_prob_double)(testcase *tc, double* before_last_lo void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline); template NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0); +template +NUMBER compute_full_prob_avxd(testcase *tc, NUMBER *before_last_log=0); +template +NUMBER compute_full_prob_avxs(testcase *tc, NUMBER *before_last_log=0); +template +NUMBER compute_full_prob_ssed(testcase *tc, NUMBER *before_last_log=0); +template +NUMBER compute_full_prob_sses(testcase *tc, NUMBER *before_last_log=0); + double getCurrClk(); void get_time(struct timespec* x); uint64_t diff_time(struct timespec& prev_time); @@ -71,5 +80,6 @@ void do_compute(char* filename, bool use_old_read_testcase=true, unsigned chunk_ /*#define DUMP_COMPUTE_VALUES 1*/ #define BATCH_SIZE 10000 #define RUN_HYBRID +/*#define PRINT_PER_INTERVAL_TIMINGS 1*/ #endif diff --git a/public/VectorPairHMM/src/main/c++/vector_defs.h b/public/VectorPairHMM/src/main/c++/vector_defs.h deleted file mode 100644 index 2aca9565f..000000000 --- a/public/VectorPairHMM/src/main/c++/vector_defs.h +++ /dev/null @@ -1,55 +0,0 @@ -/*Copyright (c) 2012 The Broad Institute - -*Permission is hereby granted, free of charge, to any person -*obtaining a copy of this software and associated documentation -*files (the "Software"), to deal in the Software without -*restriction, including without limitation the rights to use, -*copy, modify, merge, publish, distribute, sublicense, and/or sell -*copies of the Software, and to permit persons to whom the -*Software is furnished to do so, subject to the following -*conditions: - -*The above copyright notice and this permission notice shall be -*included in all copies or substantial portions of the Software. - -*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -*THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - - -#undef SIMD_ENGINE -#undef SIMD_ENGINE_AVX -#undef SIMD_ENGINE_SSE - -#define SIMD_ENGINE avx -#define SIMD_ENGINE_AVX - -#include "define-float.h" -#include "vector_function_prototypes.h" - -#include "define-double.h" -#include "vector_function_prototypes.h" - -#undef SIMD_ENGINE -#undef SIMD_ENGINE_AVX - -#define SIMD_ENGINE sse -#define SIMD_ENGINE_SSE - - -#include "define-sse-float.h" -#include "vector_function_prototypes.h" - -#include "define-sse-double.h" -#include "vector_function_prototypes.h" - -#undef SIMD_ENGINE -#undef SIMD_ENGINE_AVX -#undef SIMD_ENGINE_SSE - diff --git a/public/VectorPairHMM/src/main/c++/vector_function_prototypes.h b/public/VectorPairHMM/src/main/c++/vector_function_prototypes.h deleted file mode 100644 index c0fddc394..000000000 --- a/public/VectorPairHMM/src/main/c++/vector_function_prototypes.h +++ /dev/null @@ -1,44 +0,0 @@ -/*Copyright (c) 2012 The Broad Institute - -*Permission is hereby granted, free of charge, to any person -*obtaining a copy of this software and associated documentation -*files (the "Software"), to deal in the Software without -*restriction, including without limitation the rights to use, -*copy, modify, merge, publish, distribute, sublicense, and/or sell -*copies of the Software, and to permit persons to whom the -*Software is furnished to do so, subject to the following -*conditions: - -*The above copyright notice and this permission notice shall be -*included in all copies or substantial portions of the Software. - -*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -*THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - - -inline void CONCAT(CONCAT(_vector_shift,SIMD_ENGINE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut); -inline void CONCAT(CONCAT(_vector_shift_last,SIMD_ENGINE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn); -inline void CONCAT(CONCAT(precompute_masks_,SIMD_ENGINE), PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]); -inline void CONCAT(CONCAT(init_masks_for_row_,SIMD_ENGINE), PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess); -inline void CONCAT(CONCAT(update_masks_for_cols_,SIMD_ENGINE), PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt); -inline void CONCAT(CONCAT(computeDistVec,SIMD_ENGINE), PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, SIMD_TYPE& distm, SIMD_TYPE& _1_distm, SIMD_TYPE& distmChosen); -template inline void CONCAT(CONCAT(initializeVectors,SIMD_ENGINE), PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context ctx, testcase *tc, SIMD_TYPE *p_MM, SIMD_TYPE *p_GAPM, SIMD_TYPE *p_MX, SIMD_TYPE *p_XX, SIMD_TYPE *p_MY, SIMD_TYPE *p_YY, SIMD_TYPE *distm1D); -template inline void CONCAT(CONCAT(stripINITIALIZATION,SIMD_ENGINE), PRECISION)( - int stripIdx, Context ctx, testcase *tc, SIMD_TYPE &pGAPM, SIMD_TYPE &pMM, SIMD_TYPE &pMX, SIMD_TYPE &pXX, SIMD_TYPE &pMY, SIMD_TYPE &pYY, - SIMD_TYPE &rs, UNION_TYPE &rsN, SIMD_TYPE &distm, SIMD_TYPE &_1_distm, SIMD_TYPE *distm1D, SIMD_TYPE N_packed256, SIMD_TYPE *p_MM , SIMD_TYPE *p_GAPM , - SIMD_TYPE *p_MX, SIMD_TYPE *p_XX , SIMD_TYPE *p_MY, SIMD_TYPE *p_YY, UNION_TYPE &M_t_2, UNION_TYPE &X_t_2, UNION_TYPE &M_t_1, UNION_TYPE &X_t_1, - UNION_TYPE &Y_t_2, UNION_TYPE &Y_t_1, UNION_TYPE &M_t_1_y, NUMBER* shiftOutX, NUMBER* shiftOutM); -inline SIMD_TYPE CONCAT(CONCAT(computeDISTM,SIMD_ENGINE), PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, SIMD_TYPE rs, UNION_TYPE rsN, SIMD_TYPE N_packed256, - SIMD_TYPE distm, SIMD_TYPE _1_distm); -inline void CONCAT(CONCAT(computeMXY,SIMD_ENGINE), PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y, - UNION_TYPE M_t_2, UNION_TYPE X_t_2, UNION_TYPE Y_t_2, UNION_TYPE M_t_1, UNION_TYPE X_t_1, UNION_TYPE M_t_1_y, UNION_TYPE Y_t_1, - SIMD_TYPE pMM, SIMD_TYPE pGAPM, SIMD_TYPE pMX, SIMD_TYPE pXX, SIMD_TYPE pMY, SIMD_TYPE pYY, SIMD_TYPE distmSel); -template NUMBER CONCAT(CONCAT(compute_full_prob_,SIMD_ENGINE), PRECISION) (testcase *tc, NUMBER *before_last_log = NULL); - diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/pairhmm/PairHMM.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/pairhmm/PairHMM.java index 4203677a1..e45cf0941 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/pairhmm/PairHMM.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/pairhmm/PairHMM.java @@ -78,8 +78,9 @@ public abstract class PairHMM { protected double[] mLikelihoodArray; //profiling information - protected static final boolean doProfiling = true; - protected long computeTime = 0; + protected static Boolean doProfiling = true; + protected static long pairHMMComputeTime = 0; + protected long threadLocalPairHMMComputeTimeDiff = 0; protected long startTime = 0; /** @@ -207,7 +208,13 @@ public abstract class PairHMM { } } if(doProfiling) - computeTime += (System.nanoTime() - startTime); + { + threadLocalPairHMMComputeTimeDiff = (System.nanoTime() - startTime); + //synchronized(doProfiling) + { + pairHMMComputeTime += threadLocalPairHMMComputeTimeDiff; + } + } return likelihoodMap; } @@ -314,6 +321,17 @@ public abstract class PairHMM { return Math.min(haplotype1.length, haplotype2.length); } + /** + * Use number of threads to set doProfiling flag - doProfiling iff numThreads == 1 + * This function should be called only during initialization phase - single thread phase of HC + */ + public static void setNumberOfThreads(final int numThreads) + { + doProfiling = (numThreads == 1); + if(numThreads > 1) + logger.info("Performance profiling for PairHMM is disabled because HaplotypeCaller is being run with multiple threads (-nct>1) option\nProfiling is enabled only when running in single thread mode\n"); + } + /** * Return the results of the computeLikelihoods function */ @@ -324,6 +342,6 @@ public abstract class PairHMM { public void close() { if(doProfiling) - System.out.println("Total compute time in PairHMM computeLikelihoods() : "+(computeTime*1e-9)); + System.out.println("Total compute time in PairHMM computeLikelihoods() : "+(pairHMMComputeTime*1e-9)); } } diff --git a/public/sting-utils/src/main/resources/org/broadinstitute/sting/utils/pairhmm/libVectorLoglessPairHMM.so b/public/sting-utils/src/main/resources/org/broadinstitute/sting/utils/pairhmm/libVectorLoglessPairHMM.so index 7cd8b1f73..ca0f6eef7 100644 Binary files a/public/sting-utils/src/main/resources/org/broadinstitute/sting/utils/pairhmm/libVectorLoglessPairHMM.so and b/public/sting-utils/src/main/resources/org/broadinstitute/sting/utils/pairhmm/libVectorLoglessPairHMM.so differ