From 20a46e4098469c47fd124a5610f49e3f928759fc Mon Sep 17 00:00:00 2001 From: Karthik Gururaj Date: Fri, 7 Feb 2014 15:19:55 -0800 Subject: [PATCH] Check only for SSE 4.1 (rather than SSE 4.2) when trying to use the SSE implementation of PairHMM --- .../utils/pairhmm/VectorLoglessPairHMM.java | 5 +++-- public/c++/VectorPairHMM/utils.cc | 18 ++++++++++++++++-- public/c++/VectorPairHMM/utils.h | 3 ++- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM.java index aebe3cf95..29d3a729d 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM.java @@ -77,8 +77,9 @@ import java.io.OutputStream; public class VectorLoglessPairHMM extends JNILoglessPairHMM { //For machine capabilities - public static final long sse42Mask = 1; - public static final long avxMask = 2; + public static final long sse41Mask = 1; + public static final long sse42Mask = 2; + public static final long avxMask = 4; public static final long enableAll = 0xFFFFFFFFFFFFFFFFl; //Used to copy references to byteArrays to JNI from reads diff --git a/public/c++/VectorPairHMM/utils.cc b/public/c++/VectorPairHMM/utils.cc index 9974c5ace..8fe20234e 100644 --- a/public/c++/VectorPairHMM/utils.cc +++ b/public/c++/VectorPairHMM/utils.cc @@ -22,6 +22,18 @@ bool is_avx_supported() return ((ecx >> 28)&1) == 1; } +bool is_sse41_supported() +{ + int ecx = 0, edx = 0, ebx = 0; + __asm__("cpuid" + : "=b" (ebx), + "=c" (ecx), + "=d" (edx) + : "a" (1) + ); + return ((ecx >> 19)&1) == 1; +} + bool is_sse42_supported() { int ecx = 0, edx = 0, ebx = 0; @@ -41,6 +53,8 @@ uint64_t get_machine_capabilities() machine_mask |= (1 << AVX_CUSTOM_IDX); if(is_sse42_supported()) machine_mask |= (1 << SSE42_CUSTOM_IDX); + if(is_sse41_supported()) + machine_mask |= (1 << SSE41_CUSTOM_IDX); return machine_mask; } @@ -54,9 +68,9 @@ void initialize_function_pointers(uint64_t mask) g_compute_full_prob_double = compute_full_prob_avxd; } else - if(is_sse42_supported() && (mask & (1<< SSE42_CUSTOM_IDX))) + if(is_sse41_supported() && (mask & (1<< SSE41_CUSTOM_IDX))) { - cout << "Using SSE4.2 accelerated implementation of PairHMM\n"; + cout << "Using SSE4.1 accelerated implementation of PairHMM\n"; g_compute_full_prob_float = compute_full_prob_sses; g_compute_full_prob_double = compute_full_prob_ssed; } diff --git a/public/c++/VectorPairHMM/utils.h b/public/c++/VectorPairHMM/utils.h index 092132473..501b48fbc 100644 --- a/public/c++/VectorPairHMM/utils.h +++ b/public/c++/VectorPairHMM/utils.h @@ -32,7 +32,8 @@ uint64_t diff_time(struct timespec& prev_time); //bit 0 is sse4.2, bit 1 is AVX enum ProcessorCapabilitiesEnum { - SSE42_CUSTOM_IDX=0, + SSE41_CUSTOM_IDX=0, + SSE42_CUSTOM_IDX, AVX_CUSTOM_IDX }; #define ENABLE_ALL_HARDWARE_FEATURES 0xFFFFFFFFFFFFFFFFull