gatk-3.8/public/VectorPairHMM/src/main/c++/template.h

/*Copyright (c) 2012 The Broad Institute

*Permission is hereby granted, free of charge, to any person
*obtaining a copy of this software and associated documentation
*files (the "Software"), to deal in the Software without
*restriction, including without limitation the rights to use,
*copy, modify, merge, publish, distribute, sublicense, and/or sell
*copies of the Software, and to permit persons to whom the
*Software is furnished to do so, subject to the following
*conditions:

*The above copyright notice and this permission notice shall be
*included in all copies or substantial portions of the Software.

*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/


#ifndef TEMPLATES_H_
#define TEMPLATES_H_

#include "headers.h"


#define ALIGNED __attribute__((aligned(32)))

#ifdef SIMD_ENGINE_AVX
typedef union __attribute__((aligned(32))) {
        ALIGNED __m256 ALIGNED d;
        ALIGNED __m128i ALIGNED s[2];
        ALIGNED float  ALIGNED f[8];
        ALIGNED __m256i ALIGNED i;
} ALIGNED mix_F ALIGNED;
#endif

typedef union __attribute__((aligned(32))) {
        ALIGNED __m128 ALIGNED d;
        ALIGNED __m64 ALIGNED s[2];
        ALIGNED float  ALIGNED f[4];
        ALIGNED __m128i ALIGNED i;
} ALIGNED mix_F128 ALIGNED;

typedef union ALIGNED {
  __m128i vec ;
  __m128 vecf ;
  uint32_t masks[4] ;
} MaskVec_F ;

typedef union ALIGNED {
  __m64 vec ;
  __m64 vecf ;
  uint32_t masks[2] ;
} MaskVec_F128 ;

typedef union ALIGNED
{
        ALIGNED __m128i ALIGNED i;
        ALIGNED __m128 ALIGNED f;
} ALIGNED IF_128f ALIGNED;

typedef union ALIGNED
{
        ALIGNED int    ALIGNED i;
        ALIGNED float  ALIGNED f;
} ALIGNED IF_32 ALIGNED;

#ifdef SIMD_ENGINE_AVX
typedef union __attribute__((aligned(32))) {
        ALIGNED __m256d ALIGNED d;
        ALIGNED __m128i ALIGNED s[2];
        ALIGNED double  ALIGNED f[4];
        ALIGNED __m256i ALIGNED i;
} ALIGNED mix_D ALIGNED;
#endif

typedef union __attribute__((aligned(32))) {
        ALIGNED __m128d ALIGNED d;
        ALIGNED __m64 ALIGNED s[2];
        ALIGNED double  ALIGNED f[2];
        ALIGNED __m128i ALIGNED i;
} ALIGNED mix_D128 ALIGNED;

typedef union ALIGNED {
  __m128i vec ;
  __m128d vecf ;
  uint64_t masks[2] ;
} MaskVec_D ;

typedef union ALIGNED {
  __m64 vec ;
  __m64 vecf ;
  uint64_t masks[1] ;
} MaskVec_D128 ;

typedef union ALIGNED
{
        ALIGNED __m128i ALIGNED i;
        ALIGNED __m128d ALIGNED f;
} ALIGNED IF_128d ALIGNED;

typedef union ALIGNED
{
        ALIGNED int64_t ALIGNED i;
        ALIGNED double  ALIGNED f;
} ALIGNED IF_64 ALIGNED;


#include "common_data_structure.h"

#endif
Added public license text to all C++ files 2014-03-04 01:04:00 +08:00			`/*Copyright (c) 2012 The Broad Institute`

			`*Permission is hereby granted, free of charge, to any person`
			`*obtaining a copy of this software and associated documentation`
			`*files (the "Software"), to deal in the Software without`
			`*restriction, including without limitation the rights to use,`
			`*copy, modify, merge, publish, distribute, sublicense, and/or sell`
			`*copies of the Software, and to permit persons to whom the`
			`*Software is furnished to do so, subject to the following`
			`*conditions:`

			`*The above copyright notice and this permission notice shall be`
			`*included in all copies or substantial portions of the Software.`

			`*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,`
			`*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES`
			`*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND`
			`*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT`
			`*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,`
			`*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING`
			`*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR`
			`*THE USE OR OTHER DEALINGS IN THE SOFTWARE.`
			`*/`


First import of AVX-JNI to git 2014-01-15 09:26:55 +08:00			`#ifndef TEMPLATES_H_`
			`#define TEMPLATES_H_`

Added support for dynamic selection between AVX and un-vectorized C++, still to include SSE code from Mohammad. Debug flags turned on in this commit. 2014-01-19 03:07:23 +08:00			`#include "headers.h"`
1. Nested loops over reads and haplotypes moved to C++ through JNI 2. OpenMP support added 3. Using direct access to Java primitive arrays 4. Debug messages disabled 2014-01-17 11:53:50 +08:00
First import of AVX-JNI to git 2014-01-15 09:26:55 +08:00
			`#define ALIGNED __attribute__((aligned(32)))`

Parallel version of the JNI for the PairHMM The JNI treats shared memory as critical memory and doesn't allow any parallel reads or writes to it until the native code finishes. This is not a problem per se it is the right thing to do, but we need to enable -nct when running the haplotype caller and with it have multiple native PairHMM running for each map call. Move to a copy based memory sharing where the JNI simply copies the memory over to C++ and then has no blocked critical memory when running, allowing -nct to work. This version is slightly (almost unnoticeably) slower with -nct 1, but scales better with -nct 2-4 (we haven't tested anything beyond that because we know the GATK falls apart with higher levels of parallelism * Make VECTOR_LOGLESS_CACHING the default implementation for PairHMM. * Changed version number in pom.xml under public/VectorPairHMM * VectorPairHMM can now be compiled using gcc 4.8.x * Modified define-* to get rid of gcc warnings for extra tokens after #undefs * Added a Linux kernel version check for AVX - gcc's __builtin_cpu_supports function does not check whether the kernel supports AVX or not. * Updated PairHMM profiling code to update and print numbers only in single-thread mode * Edited README.md, pom.xml and Makefile for users to pass path to gcc 4.8.x if necessary * Moved all cpuid inline assembly to single function Changed info message to clog from cinfo * Modified version in pom.xml in VectorPairHMM from 3.1 to 3.2 * Deleted some unnecessary code * Modified C++ sandbox to print per interval timing 2014-03-18 02:42:19 +08:00			`#ifdef SIMD_ENGINE_AVX`
First import of AVX-JNI to git 2014-01-15 09:26:55 +08:00			`typedef union __attribute__((aligned(32))) {`
			`ALIGNED __m256 ALIGNED d;`
			`ALIGNED __m128i ALIGNED s[2];`
			`ALIGNED float ALIGNED f[8];`
			`ALIGNED __m256i ALIGNED i;`
			`} ALIGNED mix_F ALIGNED;`
Parallel version of the JNI for the PairHMM The JNI treats shared memory as critical memory and doesn't allow any parallel reads or writes to it until the native code finishes. This is not a problem per se it is the right thing to do, but we need to enable -nct when running the haplotype caller and with it have multiple native PairHMM running for each map call. Move to a copy based memory sharing where the JNI simply copies the memory over to C++ and then has no blocked critical memory when running, allowing -nct to work. This version is slightly (almost unnoticeably) slower with -nct 1, but scales better with -nct 2-4 (we haven't tested anything beyond that because we know the GATK falls apart with higher levels of parallelism * Make VECTOR_LOGLESS_CACHING the default implementation for PairHMM. * Changed version number in pom.xml under public/VectorPairHMM * VectorPairHMM can now be compiled using gcc 4.8.x * Modified define-* to get rid of gcc warnings for extra tokens after #undefs * Added a Linux kernel version check for AVX - gcc's __builtin_cpu_supports function does not check whether the kernel supports AVX or not. * Updated PairHMM profiling code to update and print numbers only in single-thread mode * Edited README.md, pom.xml and Makefile for users to pass path to gcc 4.8.x if necessary * Moved all cpuid inline assembly to single function Changed info message to clog from cinfo * Modified version in pom.xml in VectorPairHMM from 3.1 to 3.2 * Deleted some unnecessary code * Modified C++ sandbox to print per interval timing 2014-03-18 02:42:19 +08:00			`#endif`
First import of AVX-JNI to git 2014-01-15 09:26:55 +08:00
1. Integrated Mohammad's SSE4.2 code, Mustafa's bug fix and code to fix the SSE compilation warning. 2. Added code to dynamically select between AVX, SSE4.2 and normal C++ (in that order) 3. Created multiple files to compile with different compilation flags: avx_function_prototypes.cc is compiled with -xAVX while sse_function_instantiations.cc is compiled with -xSSE4.2 flag. 4. Added jniClose() and support in Java (HaplotypeCaller, PairHMMLikelihoodCalculationEngine) to call this function at the end of the program. 5. Removed debug code, kept assertions and profiling in C++ 6. Disabled OpenMP for now. 2014-01-21 00:03:42 +08:00			`typedef union __attribute__((aligned(32))) {`
			`ALIGNED __m128 ALIGNED d;`
			`ALIGNED __m64 ALIGNED s[2];`
			`ALIGNED float ALIGNED f[4];`
			`ALIGNED __m128i ALIGNED i;`
			`} ALIGNED mix_F128 ALIGNED;`

First import of AVX-JNI to git 2014-01-15 09:26:55 +08:00			`typedef union ALIGNED {`
			`__m128i vec ;`
			`__m128 vecf ;`
			`uint32_t masks[4] ;`
			`} MaskVec_F ;`

1. Integrated Mohammad's SSE4.2 code, Mustafa's bug fix and code to fix the SSE compilation warning. 2. Added code to dynamically select between AVX, SSE4.2 and normal C++ (in that order) 3. Created multiple files to compile with different compilation flags: avx_function_prototypes.cc is compiled with -xAVX while sse_function_instantiations.cc is compiled with -xSSE4.2 flag. 4. Added jniClose() and support in Java (HaplotypeCaller, PairHMMLikelihoodCalculationEngine) to call this function at the end of the program. 5. Removed debug code, kept assertions and profiling in C++ 6. Disabled OpenMP for now. 2014-01-21 00:03:42 +08:00			`typedef union ALIGNED {`
			`__m64 vec ;`
			`__m64 vecf ;`
			`uint32_t masks[2] ;`
			`} MaskVec_F128 ;`

First import of AVX-JNI to git 2014-01-15 09:26:55 +08:00			`typedef union ALIGNED`
			`{`
			`ALIGNED __m128i ALIGNED i;`
			`ALIGNED __m128 ALIGNED f;`
			`} ALIGNED IF_128f ALIGNED;`

			`typedef union ALIGNED`
			`{`
			`ALIGNED int ALIGNED i;`
			`ALIGNED float ALIGNED f;`
			`} ALIGNED IF_32 ALIGNED;`

Parallel version of the JNI for the PairHMM The JNI treats shared memory as critical memory and doesn't allow any parallel reads or writes to it until the native code finishes. This is not a problem per se it is the right thing to do, but we need to enable -nct when running the haplotype caller and with it have multiple native PairHMM running for each map call. Move to a copy based memory sharing where the JNI simply copies the memory over to C++ and then has no blocked critical memory when running, allowing -nct to work. This version is slightly (almost unnoticeably) slower with -nct 1, but scales better with -nct 2-4 (we haven't tested anything beyond that because we know the GATK falls apart with higher levels of parallelism * Make VECTOR_LOGLESS_CACHING the default implementation for PairHMM. * Changed version number in pom.xml under public/VectorPairHMM * VectorPairHMM can now be compiled using gcc 4.8.x * Modified define-* to get rid of gcc warnings for extra tokens after #undefs * Added a Linux kernel version check for AVX - gcc's __builtin_cpu_supports function does not check whether the kernel supports AVX or not. * Updated PairHMM profiling code to update and print numbers only in single-thread mode * Edited README.md, pom.xml and Makefile for users to pass path to gcc 4.8.x if necessary * Moved all cpuid inline assembly to single function Changed info message to clog from cinfo * Modified version in pom.xml in VectorPairHMM from 3.1 to 3.2 * Deleted some unnecessary code * Modified C++ sandbox to print per interval timing 2014-03-18 02:42:19 +08:00			`#ifdef SIMD_ENGINE_AVX`
First import of AVX-JNI to git 2014-01-15 09:26:55 +08:00			`typedef union __attribute__((aligned(32))) {`
			`ALIGNED __m256d ALIGNED d;`
			`ALIGNED __m128i ALIGNED s[2];`
			`ALIGNED double ALIGNED f[4];`
			`ALIGNED __m256i ALIGNED i;`
			`} ALIGNED mix_D ALIGNED;`
Parallel version of the JNI for the PairHMM The JNI treats shared memory as critical memory and doesn't allow any parallel reads or writes to it until the native code finishes. This is not a problem per se it is the right thing to do, but we need to enable -nct when running the haplotype caller and with it have multiple native PairHMM running for each map call. Move to a copy based memory sharing where the JNI simply copies the memory over to C++ and then has no blocked critical memory when running, allowing -nct to work. This version is slightly (almost unnoticeably) slower with -nct 1, but scales better with -nct 2-4 (we haven't tested anything beyond that because we know the GATK falls apart with higher levels of parallelism * Make VECTOR_LOGLESS_CACHING the default implementation for PairHMM. * Changed version number in pom.xml under public/VectorPairHMM * VectorPairHMM can now be compiled using gcc 4.8.x * Modified define-* to get rid of gcc warnings for extra tokens after #undefs * Added a Linux kernel version check for AVX - gcc's __builtin_cpu_supports function does not check whether the kernel supports AVX or not. * Updated PairHMM profiling code to update and print numbers only in single-thread mode * Edited README.md, pom.xml and Makefile for users to pass path to gcc 4.8.x if necessary * Moved all cpuid inline assembly to single function Changed info message to clog from cinfo * Modified version in pom.xml in VectorPairHMM from 3.1 to 3.2 * Deleted some unnecessary code * Modified C++ sandbox to print per interval timing 2014-03-18 02:42:19 +08:00			`#endif`
First import of AVX-JNI to git 2014-01-15 09:26:55 +08:00
1. Integrated Mohammad's SSE4.2 code, Mustafa's bug fix and code to fix the SSE compilation warning. 2. Added code to dynamically select between AVX, SSE4.2 and normal C++ (in that order) 3. Created multiple files to compile with different compilation flags: avx_function_prototypes.cc is compiled with -xAVX while sse_function_instantiations.cc is compiled with -xSSE4.2 flag. 4. Added jniClose() and support in Java (HaplotypeCaller, PairHMMLikelihoodCalculationEngine) to call this function at the end of the program. 5. Removed debug code, kept assertions and profiling in C++ 6. Disabled OpenMP for now. 2014-01-21 00:03:42 +08:00			`typedef union __attribute__((aligned(32))) {`
			`ALIGNED __m128d ALIGNED d;`
			`ALIGNED __m64 ALIGNED s[2];`
			`ALIGNED double ALIGNED f[2];`
			`ALIGNED __m128i ALIGNED i;`
			`} ALIGNED mix_D128 ALIGNED;`

First import of AVX-JNI to git 2014-01-15 09:26:55 +08:00			`typedef union ALIGNED {`
			`__m128i vec ;`
			`__m128d vecf ;`
			`uint64_t masks[2] ;`
			`} MaskVec_D ;`

1. Integrated Mohammad's SSE4.2 code, Mustafa's bug fix and code to fix the SSE compilation warning. 2. Added code to dynamically select between AVX, SSE4.2 and normal C++ (in that order) 3. Created multiple files to compile with different compilation flags: avx_function_prototypes.cc is compiled with -xAVX while sse_function_instantiations.cc is compiled with -xSSE4.2 flag. 4. Added jniClose() and support in Java (HaplotypeCaller, PairHMMLikelihoodCalculationEngine) to call this function at the end of the program. 5. Removed debug code, kept assertions and profiling in C++ 6. Disabled OpenMP for now. 2014-01-21 00:03:42 +08:00			`typedef union ALIGNED {`
			`__m64 vec ;`
			`__m64 vecf ;`
			`uint64_t masks[1] ;`
			`} MaskVec_D128 ;`

First import of AVX-JNI to git 2014-01-15 09:26:55 +08:00			`typedef union ALIGNED`
			`{`
			`ALIGNED __m128i ALIGNED i;`
			`ALIGNED __m128d ALIGNED f;`
			`} ALIGNED IF_128d ALIGNED;`

			`typedef union ALIGNED`
			`{`
			`ALIGNED int64_t ALIGNED i;`
			`ALIGNED double ALIGNED f;`
			`} ALIGNED IF_64 ALIGNED;`


Parallel version of the JNI for the PairHMM The JNI treats shared memory as critical memory and doesn't allow any parallel reads or writes to it until the native code finishes. This is not a problem per se it is the right thing to do, but we need to enable -nct when running the haplotype caller and with it have multiple native PairHMM running for each map call. Move to a copy based memory sharing where the JNI simply copies the memory over to C++ and then has no blocked critical memory when running, allowing -nct to work. This version is slightly (almost unnoticeably) slower with -nct 1, but scales better with -nct 2-4 (we haven't tested anything beyond that because we know the GATK falls apart with higher levels of parallelism * Make VECTOR_LOGLESS_CACHING the default implementation for PairHMM. * Changed version number in pom.xml under public/VectorPairHMM * VectorPairHMM can now be compiled using gcc 4.8.x * Modified define-* to get rid of gcc warnings for extra tokens after #undefs * Added a Linux kernel version check for AVX - gcc's __builtin_cpu_supports function does not check whether the kernel supports AVX or not. * Updated PairHMM profiling code to update and print numbers only in single-thread mode * Edited README.md, pom.xml and Makefile for users to pass path to gcc 4.8.x if necessary * Moved all cpuid inline assembly to single function Changed info message to clog from cinfo * Modified version in pom.xml in VectorPairHMM from 3.1 to 3.2 * Deleted some unnecessary code * Modified C++ sandbox to print per interval timing 2014-03-18 02:42:19 +08:00			`#include "common_data_structure.h"`
First import of AVX-JNI to git 2014-01-15 09:26:55 +08:00
			`#endif`