Merge pull request #573 from broadinstitute/intel_pairhmm
Intel pairhmm
This commit is contained in:
commit
587e81fbd9
|
|
@ -417,7 +417,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
*/
|
||||
@Hidden
|
||||
@Argument(fullName = "pair_hmm_implementation", shortName = "pairHMM", doc = "The PairHMM implementation to use for genotype likelihood calculations", required = false)
|
||||
public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING;
|
||||
public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.VECTOR_LOGLESS_CACHING;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false)
|
||||
|
|
@ -639,6 +639,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
logger.info("Using global mismapping rate of " + phredScaledGlobalReadMismappingRate + " => " + log10GlobalReadMismappingRate + " in log10 likelihood units");
|
||||
}
|
||||
|
||||
//static member function - set number of threads
|
||||
PairHMM.setNumberOfThreads(getToolkit().getTotalNumberOfThreads());
|
||||
// create our likelihood calculation engine
|
||||
likelihoodCalculationEngine = createLikelihoodCalculationEngine();
|
||||
|
||||
|
|
|
|||
|
|
@ -58,6 +58,7 @@ import java.util.HashMap;
|
|||
*/
|
||||
public abstract class JNILoglessPairHMM extends LoglessPairHMM {
|
||||
public abstract HashMap<Haplotype, Integer> getHaplotypeToHaplotypeListIdxMap();
|
||||
protected long setupTime = 0;
|
||||
protected long threadLocalSetupTimeDiff = 0;
|
||||
protected static long pairHMMSetupTime = 0;
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -228,7 +228,7 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
|
|||
|
||||
mLikelihoodArray = new double[readListSize*numHaplotypes]; //to store results
|
||||
if(doProfiling)
|
||||
setupTime += (System.nanoTime() - startTime);
|
||||
threadLocalSetupTimeDiff = (System.nanoTime() - startTime);
|
||||
//for(reads)
|
||||
// for(haplotypes)
|
||||
// compute_full_prob()
|
||||
|
|
@ -251,7 +251,14 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
|
|||
readIdx += numHaplotypes;
|
||||
}
|
||||
if(doProfiling)
|
||||
computeTime += (System.nanoTime() - startTime);
|
||||
{
|
||||
threadLocalPairHMMComputeTimeDiff = (System.nanoTime() - startTime);
|
||||
//synchronized(doProfiling)
|
||||
{
|
||||
pairHMMComputeTime += threadLocalPairHMMComputeTimeDiff;
|
||||
pairHMMSetupTime += threadLocalSetupTimeDiff;
|
||||
}
|
||||
}
|
||||
return likelihoodMap;
|
||||
}
|
||||
|
||||
|
|
@ -262,7 +269,8 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
|
|||
@Override
|
||||
public void close()
|
||||
{
|
||||
System.out.println("Time spent in setup for JNI call : "+(setupTime*1e-9));
|
||||
if(doProfiling)
|
||||
System.out.println("Time spent in setup for JNI call : "+(pairHMMSetupTime*1e-9));
|
||||
super.close();
|
||||
jniClose();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -358,7 +358,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
final File outputVCFNoQD = executeTest("testQualByDepth calling without QD", specNoQD).getFirst().get(0);
|
||||
|
||||
final String baseAnn = String.format("-T VariantAnnotator -R %s -V %s", REF, outputVCFNoQD.getAbsolutePath()) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -A QualByDepth";
|
||||
final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("4ccdbebcfd02be87ae5b4ad94666f011"));
|
||||
final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("214799b5d1c0809f042c557155ea6e13"));
|
||||
specAnn.disableShadowBCF();
|
||||
final File outputVCFAnn = executeTest("testQualByDepth re-annotation of QD", specAnn).getFirst().get(0);
|
||||
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex1() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "65c316f1f3987d7bc94e887999920d45");
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "d1626d5fff419b8a782de954224e881f");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
|
|||
|
|
@ -284,7 +284,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestConservativePcrIndelModelWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
||||
Arrays.asList("7592274ecd2b5ef4624dd2ed659536ef"));
|
||||
Arrays.asList("1b41bf83b9c249a9a5fffb1e308668c1"));
|
||||
executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -44,9 +44,10 @@ end of every region (line 351 in PairHMMLikelihoodCalculationEngine).
|
|||
Note: Debug code has been moved to a separate class DebugJNILoglessPairHMM.java.
|
||||
|
||||
Compiling:
|
||||
Make sure you have icc (Intel C compiler) available. Currently, gcc does not seem to support all AVX
|
||||
intrinsics.
|
||||
This native library is called libVectorLoglessPairHMM.so
|
||||
The native library (called libVectorLoglessPairHMM.so) can be compiled with icc (Intel C compiler)
|
||||
or gcc versions >= 4.8.1 that support AVX intrinsics. By default, the make process tries to invoke
|
||||
icc. To use gcc, edit the file 'pom.xml' (in this directory) and enable the environment variables
|
||||
USE_GCC,C_COMPILER and CPP_COMPILER (edit and uncomment lines 60-62).
|
||||
Using Maven:
|
||||
Type 'mvn install' in this directory - this will build the library (by invoking 'make') and copy the
|
||||
native library to the directory
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
<parent>
|
||||
<groupId>org.broadinstitute.sting</groupId>
|
||||
<artifactId>sting-root</artifactId>
|
||||
<version>2.8-SNAPSHOT</version>
|
||||
<version>3.2-SNAPSHOT</version>
|
||||
<relativePath>../../public/sting-root</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
@ -57,6 +57,9 @@
|
|||
<workingDirectory>src/main/c++</workingDirectory>
|
||||
<environmentVariables>
|
||||
<JRE_HOME>${java.home}</JRE_HOME>
|
||||
<!--<USE_GCC>1</USE_GCC>-->
|
||||
<!--<C_COMPILER>/opt/gcc-4.8.2/bin/gcc</C_COMPILER>-->
|
||||
<!--<CPP_COMPILER>/opt/gcc-4.8.2/bin/g++</CPP_COMPILER>-->
|
||||
<OUTPUT_DIR>${project.build.directory}</OUTPUT_DIR>
|
||||
</environmentVariables>
|
||||
</configuration>
|
||||
|
|
|
|||
|
|
@ -23,8 +23,8 @@
|
|||
*/
|
||||
|
||||
|
||||
#include "LoadTimeInitializer.h"
|
||||
#include "utils.h"
|
||||
#include "LoadTimeInitializer.h"
|
||||
using namespace std;
|
||||
char* LoadTimeInitializerStatsNames[] =
|
||||
{
|
||||
|
|
@ -43,6 +43,10 @@ LoadTimeInitializer g_load_time_initializer;
|
|||
|
||||
LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loaded
|
||||
{
|
||||
#if (defined(__GNUC__) || defined(__GNUG__)) && !defined(__INTEL_COMPILER)
|
||||
//compiles only with gcc >= 4.8
|
||||
__builtin_cpu_init();
|
||||
#endif
|
||||
ConvertChar::init();
|
||||
#ifndef DISABLE_FTZ
|
||||
//Very important to get good performance on Intel processors
|
||||
|
|
@ -71,11 +75,6 @@ LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loa
|
|||
m_filename_to_fptr.clear();
|
||||
m_written_files_set.clear();
|
||||
|
||||
//Common buffer - 8MB
|
||||
unsigned size = 1024*1024;
|
||||
m_buffer = new uint64_t[size];
|
||||
m_buffer_size = size*sizeof(uint64_t);
|
||||
|
||||
initialize_function_pointers();
|
||||
|
||||
//Initialize static members of class
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@
|
|||
#define LOAD_TIME_INITIALIZER_H
|
||||
#include "headers.h"
|
||||
#include <jni.h>
|
||||
#include "template.h"
|
||||
/*#include "template.h"*/
|
||||
|
||||
enum LoadTimeInitializerStatsEnum
|
||||
{
|
||||
|
|
@ -47,10 +47,6 @@ class LoadTimeInitializer
|
|||
{
|
||||
public:
|
||||
LoadTimeInitializer(); //will be called when library is loaded
|
||||
~LoadTimeInitializer()
|
||||
{
|
||||
delete[] m_buffer;
|
||||
}
|
||||
void print_profiling();
|
||||
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true);
|
||||
void debug_close();
|
||||
|
|
@ -72,8 +68,6 @@ class LoadTimeInitializer
|
|||
uint64_t m_data_transfer_time;
|
||||
//bytes copied
|
||||
uint64_t m_bytes_copied;
|
||||
unsigned get_buffer_size() { return m_buffer_size; }
|
||||
char* get_buffer() { return (char*)m_buffer; }
|
||||
private:
|
||||
std::map<std::string, std::ofstream*> m_filename_to_fptr;
|
||||
std::set<std::string> m_written_files_set;
|
||||
|
|
@ -83,8 +77,6 @@ class LoadTimeInitializer
|
|||
double m_sum_square_stats[TOTAL_NUMBER_STATS];
|
||||
uint64_t m_min_stats[TOTAL_NUMBER_STATS];
|
||||
uint64_t m_max_stats[TOTAL_NUMBER_STATS];
|
||||
unsigned m_buffer_size;
|
||||
uint64_t* m_buffer;
|
||||
};
|
||||
extern LoadTimeInitializer g_load_time_initializer;
|
||||
|
||||
|
|
|
|||
|
|
@ -32,14 +32,30 @@
|
|||
JRE_HOME?=/opt/jdk1.7.0_25/jre
|
||||
JNI_COMPILATION_FLAGS=-D_REENTRANT -fPIC -I${JRE_HOME}/../include -I${JRE_HOME}/../include/linux
|
||||
|
||||
COMMON_COMPILATION_FLAGS=$(JNI_COMPILATION_FLAGS) -O3 -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
|
||||
CC=icc
|
||||
CXX=icc
|
||||
#COMMON_COMPILATION_FLAGS=$(JNI_COMPILATION_FLAGS) -O3 -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
|
||||
COMMON_COMPILATION_FLAGS=$(JNI_COMPILATION_FLAGS) -O3 -Wall $(OMPCFLAGS) -Wno-unknown-pragmas -Wno-write-strings -Wno-unused-variable -Wno-unused-but-set-variable
|
||||
ifdef DISABLE_FTZ
|
||||
COMMON_COMPILATION_FLAGS+=-DDISABLE_FTZ
|
||||
endif
|
||||
|
||||
ifdef USE_GCC
|
||||
C_COMPILER?=gcc
|
||||
CPP_COMPILER?=g++
|
||||
AVX_FLAGS=-mavx
|
||||
SSE41_FLAGS=-msse4.1
|
||||
COMMON_COMPILATION_FLAGS+=-Wno-char-subscripts
|
||||
else
|
||||
C_COMPILER?=icc
|
||||
CPP_COMPILER?=icc
|
||||
AVX_FLAGS=-xAVX
|
||||
SSE41_FLAGS=-xSSE4.1
|
||||
LIBFLAGS=-static-intel
|
||||
ifdef DISABLE_FTZ
|
||||
COMMON_COMPILATION_FLAGS+=-no-ftz
|
||||
endif
|
||||
endif
|
||||
|
||||
LDFLAGS=-lm -lrt $(OMPLDFLAGS)
|
||||
ifdef DISABLE_FTZ
|
||||
COMMON_COMPILATION_FLAGS+=-DDISABLE_FTZ -no-ftz
|
||||
endif
|
||||
|
||||
PAPI_DIR=/home/karthikg/softwares/papi-5.3.0
|
||||
ifdef USE_PAPI
|
||||
|
|
@ -49,10 +65,6 @@ ifdef USE_PAPI
|
|||
endif
|
||||
endif
|
||||
|
||||
ifdef DISABLE_FTZ
|
||||
COMMON_COMPILATION_FLAGS+=-DDISABLE_FTZ -no-ftz
|
||||
endif
|
||||
|
||||
BIN=libVectorLoglessPairHMM.so pairhmm-template-main checker
|
||||
#BIN=checker
|
||||
|
||||
|
|
@ -79,8 +91,8 @@ NO_VECTOR_OBJECTS=$(NO_VECTOR_SOURCES:.cc=.o)
|
|||
AVX_OBJECTS=$(AVX_SOURCES:.cc=.o)
|
||||
SSE_OBJECTS=$(SSE_SOURCES:.cc=.o)
|
||||
$(NO_VECTOR_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS)
|
||||
$(AVX_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) -xAVX
|
||||
$(SSE_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) -xSSE4.2
|
||||
$(AVX_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) $(AVX_FLAGS)
|
||||
$(SSE_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) $(SSE41_FLAGS)
|
||||
OBJECTS=$(NO_VECTOR_OBJECTS) $(AVX_OBJECTS) $(SSE_OBJECTS)
|
||||
|
||||
all: $(BIN) Sandbox.class copied_lib
|
||||
|
|
@ -88,18 +100,18 @@ all: $(BIN) Sandbox.class copied_lib
|
|||
-include $(addprefix $(DEPDIR)/,$(SOURCES:.cc=.d))
|
||||
|
||||
checker: pairhmm-1-base.o $(COMMON_OBJECTS)
|
||||
$(CXX) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS)
|
||||
$(CPP_COMPILER) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS)
|
||||
|
||||
pairhmm-template-main: pairhmm-template-main.o $(COMMON_OBJECTS)
|
||||
$(CXX) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS)
|
||||
$(CPP_COMPILER) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS)
|
||||
|
||||
libVectorLoglessPairHMM.so: $(LIBOBJECTS)
|
||||
$(CXX) $(OMPLFLAGS) -shared -static-intel -o $@ $(LIBOBJECTS) ${LDFLAGS}
|
||||
$(CPP_COMPILER) $(OMPLFLAGS) -shared $(LIBFLAGS) -o $@ $(LIBOBJECTS) ${LDFLAGS}
|
||||
|
||||
|
||||
$(OBJECTS): %.o: %.cc
|
||||
@mkdir -p $(DEPDIR)
|
||||
$(CXX) -c -MMD -MF $(DF) $(CXXFLAGS) $(OUTPUT_OPTION) $<
|
||||
$(CPP_COMPILER) -c -MMD -MF $(DF) $(CXXFLAGS) $(OUTPUT_OPTION) $<
|
||||
|
||||
Sandbox.class: Sandbox.java
|
||||
javac Sandbox.java
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@
|
|||
*/
|
||||
|
||||
|
||||
#include "template.h"
|
||||
|
||||
#undef SIMD_ENGINE
|
||||
#undef SIMD_ENGINE_SSE
|
||||
|
|
@ -31,6 +30,8 @@
|
|||
#define SIMD_ENGINE avx
|
||||
#define SIMD_ENGINE_AVX
|
||||
|
||||
#include "template.h"
|
||||
|
||||
#include "define-float.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@
|
|||
|
||||
|
||||
#include "headers.h"
|
||||
#include "template.h"
|
||||
#include "common_data_structure.h"
|
||||
#include "utils.h"
|
||||
#include "LoadTimeInitializer.h"
|
||||
using namespace std;
|
||||
|
|
@ -48,17 +48,8 @@ NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
|
|||
//a straightforward array of pointers ensures that all data lies 'close' in memory, increasing
|
||||
//the chance of being stored together in the cache. Also, prefetchers can learn memory access
|
||||
//patterns for 2D arrays, not possible for array of pointers
|
||||
//bool locally_allocated = false;
|
||||
//NUMBER* common_buffer = 0;
|
||||
NUMBER* common_buffer = new NUMBER[3*ROWS*COLS + ROWS*6];
|
||||
//unsigned curr_size = sizeof(NUMBER)*(3*ROWS*COLS + ROWS*6);
|
||||
//if(true)
|
||||
//{
|
||||
//common_buffer = new NUMBER[3*ROWS*COLS + ROWS*6];
|
||||
//locally_allocated = true;
|
||||
//}
|
||||
//else
|
||||
//common_buffer = (NUMBER*)(g_load_time_initializer.get_buffer());
|
||||
//pointers to within the allocated buffer
|
||||
NUMBER** common_pointer_buffer = new NUMBER*[4*ROWS];
|
||||
NUMBER* ptr = common_buffer;
|
||||
|
|
@ -154,7 +145,6 @@ NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
|
|||
|
||||
#ifndef USE_STACK_ALLOCATION
|
||||
delete[] common_pointer_buffer;
|
||||
//if(locally_allocated)
|
||||
delete[] common_buffer;
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,215 @@
|
|||
#ifndef COMMON_DATA_STRUCTURE_H
|
||||
#define COMMON_DATA_STRUCTURE_H
|
||||
|
||||
#include "headers.h"
|
||||
|
||||
#define CAT(X,Y) X####Y
|
||||
#define CONCAT(X,Y) CAT(X,Y)
|
||||
|
||||
#define MM 0
|
||||
#define GapM 1
|
||||
#define MX 2
|
||||
#define XX 3
|
||||
#define MY 4
|
||||
#define YY 5
|
||||
|
||||
//#define MROWS 500
|
||||
//#define MCOLS 1000
|
||||
|
||||
|
||||
#define MAX_QUAL 254
|
||||
#define MAX_JACOBIAN_TOLERANCE 8.0
|
||||
#define JACOBIAN_LOG_TABLE_STEP 0.0001
|
||||
#define JACOBIAN_LOG_TABLE_INV_STEP (1.0 / JACOBIAN_LOG_TABLE_STEP)
|
||||
#define MAXN 70000
|
||||
#define LOG10_CACHE_SIZE (4*MAXN) // we need to be able to go up to 2*(2N) when calculating some of the coefficients
|
||||
#define JACOBIAN_LOG_TABLE_SIZE ((int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1)
|
||||
|
||||
template<class NUMBER>
|
||||
struct ContextBase
|
||||
{
|
||||
public:
|
||||
NUMBER ph2pr[128];
|
||||
NUMBER INITIAL_CONSTANT;
|
||||
NUMBER LOG10_INITIAL_CONSTANT;
|
||||
NUMBER RESULT_THRESHOLD;
|
||||
|
||||
static bool staticMembersInitializedFlag;
|
||||
static NUMBER jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE];
|
||||
static NUMBER matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1];
|
||||
|
||||
static void initializeStaticMembers()
|
||||
{
|
||||
if(!staticMembersInitializedFlag)
|
||||
{
|
||||
//Order of calls important - Jacobian first, then MatchToMatch
|
||||
initializeJacobianLogTable();
|
||||
initializeMatchToMatchProb();
|
||||
staticMembersInitializedFlag = true;
|
||||
}
|
||||
}
|
||||
|
||||
static void deleteStaticMembers()
|
||||
{
|
||||
if(staticMembersInitializedFlag)
|
||||
{
|
||||
staticMembersInitializedFlag = false;
|
||||
}
|
||||
}
|
||||
|
||||
//Called only once during library load - don't bother to optimize with single precision fp
|
||||
static void initializeJacobianLogTable()
|
||||
{
|
||||
for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
|
||||
jacobianLogTable[k] = (NUMBER)(log10(1.0 + pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP)));
|
||||
}
|
||||
}
|
||||
|
||||
//Called only once per library load - don't bother optimizing with single fp
|
||||
static void initializeMatchToMatchProb()
|
||||
{
|
||||
double LN10 = log(10);
|
||||
double INV_LN10 = 1.0/LN10;
|
||||
for (int i = 0, offset = 0; i <= MAX_QUAL; offset += ++i)
|
||||
for (int j = 0; j <= i; j++) {
|
||||
double log10Sum = approximateLog10SumLog10(-0.1*i, -0.1*j);
|
||||
double matchToMatchLog10 =
|
||||
log1p(-std::min(1.0,pow(10,log10Sum))) * INV_LN10;
|
||||
matchToMatchProb[offset + j] = (NUMBER)(pow(10,matchToMatchLog10));
|
||||
}
|
||||
}
|
||||
//Called during computation - use single precision where possible
|
||||
static int fastRound(NUMBER d) {
|
||||
return (d > ((NUMBER)0.0)) ? (int) (d + ((NUMBER)0.5)) : (int) (d - ((NUMBER)0.5));
|
||||
}
|
||||
//Called during computation - use single precision where possible
|
||||
static NUMBER approximateLog10SumLog10(NUMBER small, NUMBER big) {
|
||||
// make sure small is really the smaller value
|
||||
if (small > big) {
|
||||
NUMBER t = big;
|
||||
big = small;
|
||||
small = t;
|
||||
}
|
||||
|
||||
if (isinf(small) == -1 || isinf(big) == -1)
|
||||
return big;
|
||||
|
||||
NUMBER diff = big - small;
|
||||
if (diff >= ((NUMBER)MAX_JACOBIAN_TOLERANCE))
|
||||
return big;
|
||||
|
||||
// OK, so |y-x| < tol: we use the following identity then:
|
||||
// we need to compute log10(10^x + 10^y)
|
||||
// By Jacobian logarithm identity, this is equal to
|
||||
// max(x,y) + log10(1+10^-abs(x-y))
|
||||
// we compute the second term as a table lookup with integer quantization
|
||||
// we have pre-stored correction for 0,0.1,0.2,... 10.0
|
||||
int ind = fastRound((NUMBER)(diff * ((NUMBER)JACOBIAN_LOG_TABLE_INV_STEP))); // hard rounding
|
||||
return big + jacobianLogTable[ind];
|
||||
}
|
||||
};
|
||||
|
||||
template<class NUMBER>
|
||||
struct Context : public ContextBase<NUMBER>
|
||||
{};
|
||||
|
||||
template<>
|
||||
struct Context<double> : public ContextBase<double>
|
||||
{
|
||||
Context():ContextBase<double>()
|
||||
{
|
||||
for (int x = 0; x < 128; x++)
|
||||
ph2pr[x] = pow(10.0, -((double)x) / 10.0);
|
||||
|
||||
INITIAL_CONSTANT = ldexp(1.0, 1020.0);
|
||||
LOG10_INITIAL_CONSTANT = log10(INITIAL_CONSTANT);
|
||||
RESULT_THRESHOLD = 0.0;
|
||||
}
|
||||
|
||||
double LOG10(double v){ return log10(v); }
|
||||
inline double POW(double b, double e) { return pow(b,e); }
|
||||
|
||||
static double _(double n){ return n; }
|
||||
static double _(float n){ return ((double) n); }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Context<float> : public ContextBase<float>
|
||||
{
|
||||
Context() : ContextBase<float>()
|
||||
{
|
||||
for (int x = 0; x < 128; x++)
|
||||
{
|
||||
ph2pr[x] = powf(10.f, -((float)x) / 10.f);
|
||||
}
|
||||
|
||||
INITIAL_CONSTANT = ldexpf(1.f, 120.f);
|
||||
LOG10_INITIAL_CONSTANT = log10f(INITIAL_CONSTANT);
|
||||
RESULT_THRESHOLD = ldexpf(1.f, -110.f);
|
||||
}
|
||||
|
||||
float LOG10(float v){ return log10f(v); }
|
||||
inline float POW(float b, float e) { return powf(b,e); }
|
||||
|
||||
static float _(double n){ return ((float) n); }
|
||||
static float _(float n){ return n; }
|
||||
};
|
||||
|
||||
#define SET_MATCH_TO_MATCH_PROB(output, insQual, delQual) \
|
||||
{ \
|
||||
int minQual = delQual; \
|
||||
int maxQual = insQual; \
|
||||
if (insQual <= delQual) \
|
||||
{ \
|
||||
minQual = insQual; \
|
||||
maxQual = delQual; \
|
||||
} \
|
||||
(output) = (MAX_QUAL < maxQual) ? \
|
||||
((NUMBER)1.0) - ctx.POW(((NUMBER)10), ctx.approximateLog10SumLog10(((NUMBER)-0.1)*minQual, ((NUMBER)-0.1)*maxQual)) \
|
||||
: ctx.matchToMatchProb[((maxQual * (maxQual + 1)) >> 1) + minQual]; \
|
||||
}
|
||||
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int rslen, haplen;
|
||||
/*int *q, *i, *d, *c;*/
|
||||
/*int q[MROWS], i[MROWS], d[MROWS], c[MROWS];*/
|
||||
char *q, *i, *d, *c;
|
||||
char *hap, *rs;
|
||||
int *ihap;
|
||||
int *irs;
|
||||
} testcase;
|
||||
|
||||
#define MIN_ACCEPTED 1e-28f
|
||||
#define NUM_DISTINCT_CHARS 5
|
||||
#define AMBIG_CHAR 4
|
||||
|
||||
class ConvertChar {
|
||||
|
||||
static uint8_t conversionTable[255] ;
|
||||
|
||||
public:
|
||||
|
||||
static void init() {
|
||||
assert (NUM_DISTINCT_CHARS == 5) ;
|
||||
assert (AMBIG_CHAR == 4) ;
|
||||
|
||||
conversionTable['A'] = 0 ;
|
||||
conversionTable['C'] = 1 ;
|
||||
conversionTable['T'] = 2 ;
|
||||
conversionTable['G'] = 3 ;
|
||||
conversionTable['N'] = 4 ;
|
||||
}
|
||||
|
||||
static inline uint8_t get(uint8_t input) {
|
||||
return conversionTable[input] ;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
int normalize(char c);
|
||||
int read_testcase(testcase *tc, FILE* ifp=0);
|
||||
|
||||
#endif
|
||||
|
|
@ -42,33 +42,33 @@
|
|||
#undef MASK_TYPE
|
||||
#undef MASK_ALL_ONES
|
||||
|
||||
#undef SET_VEC_ZERO(__vec)
|
||||
#undef VEC_OR(__v1, __v2)
|
||||
#undef VEC_ADD(__v1, __v2)
|
||||
#undef VEC_SUB(__v1, __v2)
|
||||
#undef VEC_MUL(__v1, __v2)
|
||||
#undef VEC_DIV(__v1, __v2)
|
||||
#undef VEC_BLEND(__v1, __v2, __mask)
|
||||
#undef VEC_BLENDV(__v1, __v2, __maskV)
|
||||
#undef VEC_CAST_256_128(__v1)
|
||||
#undef VEC_EXTRACT_128(__v1, __im)
|
||||
#undef VEC_EXTRACT_UNIT(__v1, __im)
|
||||
#undef VEC_SET1_VAL128(__val)
|
||||
#undef VEC_MOVE(__v1, __val)
|
||||
#undef VEC_CAST_128_256(__v1)
|
||||
#undef VEC_INSERT_VAL(__v1, __val, __pos)
|
||||
#undef VEC_CVT_128_256(__v1)
|
||||
#undef VEC_SET1_VAL(__val)
|
||||
#undef VEC_POPCVT_CHAR(__ch)
|
||||
#undef VEC_LDPOPCVT_CHAR(__addr)
|
||||
#undef VEC_CMP_EQ(__v1, __v2)
|
||||
#undef VEC_SET_LSE(__val)
|
||||
#undef SHIFT_HAP(__v1, __val)
|
||||
#undef SET_VEC_ZERO
|
||||
#undef VEC_OR
|
||||
#undef VEC_ADD
|
||||
#undef VEC_SUB
|
||||
#undef VEC_MUL
|
||||
#undef VEC_DIV
|
||||
#undef VEC_BLEND
|
||||
#undef VEC_BLENDV
|
||||
#undef VEC_CAST_256_128
|
||||
#undef VEC_EXTRACT_128
|
||||
#undef VEC_EXTRACT_UNIT
|
||||
#undef VEC_SET1_VAL128
|
||||
#undef VEC_MOVE
|
||||
#undef VEC_CAST_128_256
|
||||
#undef VEC_INSERT_VAL
|
||||
#undef VEC_CVT_128_256
|
||||
#undef VEC_SET1_VAL
|
||||
#undef VEC_POPCVT_CHAR
|
||||
#undef VEC_LDPOPCVT_CHAR
|
||||
#undef VEC_CMP_EQ
|
||||
#undef VEC_SET_LSE
|
||||
#undef SHIFT_HAP
|
||||
#undef MASK_VEC
|
||||
#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst)
|
||||
#undef VEC_SHIFT_LEFT_1BIT(__vs)
|
||||
#undef VEC_SSE_TO_AVX
|
||||
#undef VEC_SHIFT_LEFT_1BIT
|
||||
#undef MASK_ALL_ONES
|
||||
#undef COMPARE_VECS(__v1, __v2)
|
||||
#undef COMPARE_VECS
|
||||
#undef _256_INT_TYPE
|
||||
#undef BITMASK_VEC
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -42,33 +42,33 @@
|
|||
#undef MASK_TYPE
|
||||
#undef MASK_ALL_ONES
|
||||
|
||||
#undef SET_VEC_ZERO(__vec)
|
||||
#undef VEC_OR(__v1, __v2)
|
||||
#undef VEC_ADD(__v1, __v2)
|
||||
#undef VEC_SUB(__v1, __v2)
|
||||
#undef VEC_MUL(__v1, __v2)
|
||||
#undef VEC_DIV(__v1, __v2)
|
||||
#undef VEC_BLEND(__v1, __v2, __mask)
|
||||
#undef VEC_BLENDV(__v1, __v2, __maskV)
|
||||
#undef VEC_CAST_256_128(__v1)
|
||||
#undef VEC_EXTRACT_128(__v1, __im)
|
||||
#undef VEC_EXTRACT_UNIT(__v1, __im)
|
||||
#undef VEC_SET1_VAL128(__val)
|
||||
#undef VEC_MOVE(__v1, __val)
|
||||
#undef VEC_CAST_128_256(__v1)
|
||||
#undef VEC_INSERT_VAL(__v1, __val, __pos)
|
||||
#undef VEC_CVT_128_256(__v1)
|
||||
#undef VEC_SET1_VAL(__val)
|
||||
#undef VEC_POPCVT_CHAR(__ch)
|
||||
#undef VEC_LDPOPCVT_CHAR(__addr)
|
||||
#undef VEC_CMP_EQ(__v1, __v2)
|
||||
#undef VEC_SET_LSE(__val)
|
||||
#undef SHIFT_HAP(__v1, __val)
|
||||
#undef SET_VEC_ZERO
|
||||
#undef VEC_OR
|
||||
#undef VEC_ADD
|
||||
#undef VEC_SUB
|
||||
#undef VEC_MUL
|
||||
#undef VEC_DIV
|
||||
#undef VEC_BLEND
|
||||
#undef VEC_BLENDV
|
||||
#undef VEC_CAST_256_128
|
||||
#undef VEC_EXTRACT_128
|
||||
#undef VEC_EXTRACT_UNIT
|
||||
#undef VEC_SET1_VAL128
|
||||
#undef VEC_MOVE
|
||||
#undef VEC_CAST_128_256
|
||||
#undef VEC_INSERT_VAL
|
||||
#undef VEC_CVT_128_256
|
||||
#undef VEC_SET1_VAL
|
||||
#undef VEC_POPCVT_CHAR
|
||||
#undef VEC_LDPOPCVT_CHAR
|
||||
#undef VEC_CMP_EQ
|
||||
#undef VEC_SET_LSE
|
||||
#undef SHIFT_HAP
|
||||
#undef MASK_VEC
|
||||
#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst)
|
||||
#undef VEC_SHIFT_LEFT_1BIT(__vs)
|
||||
#undef VEC_SSE_TO_AVX
|
||||
#undef VEC_SHIFT_LEFT_1BIT
|
||||
#undef MASK_ALL_ONES
|
||||
#undef COMPARE_VECS(__v1, __v2)
|
||||
#undef COMPARE_VECS
|
||||
#undef _256_INT_TYPE
|
||||
#undef BITMASK_VEC
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -40,35 +40,35 @@
|
|||
#undef MASK_TYPE
|
||||
#undef MASK_ALL_ONES
|
||||
|
||||
#undef VEC_EXTRACT_UNIT(__v1, __im)
|
||||
#undef VEC_INSERT_UNIT(__v1,__ins,__im)
|
||||
#undef SET_VEC_ZERO(__vec)
|
||||
#undef VEC_OR(__v1, __v2)
|
||||
#undef VEC_ADD(__v1, __v2)
|
||||
#undef VEC_SUB(__v1, __v2)
|
||||
#undef VEC_MUL(__v1, __v2)
|
||||
#undef VEC_DIV(__v1, __v2)
|
||||
#undef VEC_BLEND(__v1, __v2, __mask)
|
||||
#undef VEC_BLENDV(__v1, __v2, __maskV)
|
||||
#undef VEC_CAST_256_128(__v1)
|
||||
#undef VEC_EXTRACT_128(__v1, __im)
|
||||
#undef VEC_EXTRACT_UNIT(__v1, __im)
|
||||
#undef VEC_SET1_VAL128(__val)
|
||||
#undef VEC_MOVE(__v1, __val)
|
||||
#undef VEC_CAST_128_256(__v1)
|
||||
#undef VEC_INSERT_VAL(__v1, __val, __pos)
|
||||
#undef VEC_CVT_128_256(__v1)
|
||||
#undef VEC_SET1_VAL(__val)
|
||||
#undef VEC_POPCVT_CHAR(__ch)
|
||||
#undef VEC_LDPOPCVT_CHAR(__addr)
|
||||
#undef VEC_CMP_EQ(__v1, __v2)
|
||||
#undef VEC_SET_LSE(__val)
|
||||
#undef SHIFT_HAP(__v1, __val)
|
||||
#undef VEC_EXTRACT_UNIT
|
||||
#undef VEC_INSERT_UNIT
|
||||
#undef SET_VEC_ZERO
|
||||
#undef VEC_OR
|
||||
#undef VEC_ADD
|
||||
#undef VEC_SUB
|
||||
#undef VEC_MUL
|
||||
#undef VEC_DIV
|
||||
#undef VEC_BLEND
|
||||
#undef VEC_BLENDV
|
||||
#undef VEC_CAST_256_128
|
||||
#undef VEC_EXTRACT_128
|
||||
#undef VEC_EXTRACT_UNIT
|
||||
#undef VEC_SET1_VAL128
|
||||
#undef VEC_MOVE
|
||||
#undef VEC_CAST_128_256
|
||||
#undef VEC_INSERT_VAL
|
||||
#undef VEC_CVT_128_256
|
||||
#undef VEC_SET1_VAL
|
||||
#undef VEC_POPCVT_CHAR
|
||||
#undef VEC_LDPOPCVT_CHAR
|
||||
#undef VEC_CMP_EQ
|
||||
#undef VEC_SET_LSE
|
||||
#undef SHIFT_HAP
|
||||
#undef MASK_VEC
|
||||
#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst)
|
||||
#undef VEC_SHIFT_LEFT_1BIT(__vs)
|
||||
#undef VEC_SSE_TO_AVX
|
||||
#undef VEC_SHIFT_LEFT_1BIT
|
||||
#undef MASK_ALL_ONES
|
||||
#undef COMPARE_VECS(__v1, __v2)
|
||||
#undef COMPARE_VECS
|
||||
#undef _256_INT_TYPE
|
||||
#undef BITMASK_VEC
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -40,35 +40,35 @@
|
|||
#undef MASK_TYPE
|
||||
#undef MASK_ALL_ONES
|
||||
|
||||
#undef VEC_EXTRACT_UNIT(__v1, __im)
|
||||
#undef VEC_INSERT_UNIT(__v1,__ins,__im)
|
||||
#undef SET_VEC_ZERO(__vec)
|
||||
#undef VEC_OR(__v1, __v2)
|
||||
#undef VEC_ADD(__v1, __v2)
|
||||
#undef VEC_SUB(__v1, __v2)
|
||||
#undef VEC_MUL(__v1, __v2)
|
||||
#undef VEC_DIV(__v1, __v2)
|
||||
#undef VEC_BLEND(__v1, __v2, __mask)
|
||||
#undef VEC_BLENDV(__v1, __v2, __maskV)
|
||||
#undef VEC_CAST_256_128(__v1)
|
||||
#undef VEC_EXTRACT_128(__v1, __im)
|
||||
#undef VEC_EXTRACT_UNIT(__v1, __im)
|
||||
#undef VEC_SET1_VAL128(__val)
|
||||
#undef VEC_MOVE(__v1, __val)
|
||||
#undef VEC_CAST_128_256(__v1)
|
||||
#undef VEC_INSERT_VAL(__v1, __val, __pos)
|
||||
#undef VEC_CVT_128_256(__v1)
|
||||
#undef VEC_SET1_VAL(__val)
|
||||
#undef VEC_POPCVT_CHAR(__ch)
|
||||
#undef VEC_LDPOPCVT_CHAR(__addr)
|
||||
#undef VEC_CMP_EQ(__v1, __v2)
|
||||
#undef VEC_SET_LSE(__val)
|
||||
#undef SHIFT_HAP(__v1, __val)
|
||||
#undef VEC_EXTRACT_UNIT
|
||||
#undef VEC_INSERT_UNIT
|
||||
#undef SET_VEC_ZERO
|
||||
#undef VEC_OR
|
||||
#undef VEC_ADD
|
||||
#undef VEC_SUB
|
||||
#undef VEC_MUL
|
||||
#undef VEC_DIV
|
||||
#undef VEC_BLEND
|
||||
#undef VEC_BLENDV
|
||||
#undef VEC_CAST_256_128
|
||||
#undef VEC_EXTRACT_128
|
||||
#undef VEC_EXTRACT_UNIT
|
||||
#undef VEC_SET1_VAL128
|
||||
#undef VEC_MOVE
|
||||
#undef VEC_CAST_128_256
|
||||
#undef VEC_INSERT_VAL
|
||||
#undef VEC_CVT_128_256
|
||||
#undef VEC_SET1_VAL
|
||||
#undef VEC_POPCVT_CHAR
|
||||
#undef VEC_LDPOPCVT_CHAR
|
||||
#undef VEC_CMP_EQ
|
||||
#undef VEC_SET_LSE
|
||||
#undef SHIFT_HAP
|
||||
#undef MASK_VEC
|
||||
#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst)
|
||||
#undef VEC_SHIFT_LEFT_1BIT(__vs)
|
||||
#undef VEC_SSE_TO_AVX
|
||||
#undef VEC_SHIFT_LEFT_1BIT
|
||||
#undef MASK_ALL_ONES
|
||||
#undef COMPARE_VECS(__v1, __v2)
|
||||
#undef COMPARE_VECS
|
||||
#undef _256_INT_TYPE
|
||||
#undef BITMASK_VEC
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@
|
|||
/*#define DUMP_TO_SANDBOX 1*/
|
||||
|
||||
|
||||
#define DIRECT_ACCESS_TO_JAVA_HEAP_MEMORY 1
|
||||
/*#define DIRECT_ACCESS_TO_JAVA_HEAP_MEMORY 1*/
|
||||
|
||||
#ifdef DIRECT_ACCESS_TO_JAVA_HEAP_MEMORY
|
||||
//Gets direct access to Java arrays
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@
|
|||
#include "headers.h"
|
||||
#include "jni_common.h"
|
||||
#include "org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.h"
|
||||
#include "template.h"
|
||||
#include "utils.h"
|
||||
#include "LoadTimeInitializer.h"
|
||||
#include "jnidebug.h"
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@
|
|||
#include "headers.h"
|
||||
#include "jni_common.h"
|
||||
#include "org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.h"
|
||||
#include "template.h"
|
||||
//#include "template.h"
|
||||
#include "utils.h"
|
||||
#include "LoadTimeInitializer.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -29,7 +29,6 @@ using namespace std;
|
|||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
#define BATCH_SIZE 10000
|
||||
if(argc < 2)
|
||||
{
|
||||
cerr << "Needs path to input file as argument\n";
|
||||
|
|
|
|||
|
|
@ -24,12 +24,11 @@
|
|||
|
||||
|
||||
#include "headers.h"
|
||||
#include "template.h"
|
||||
#include "vector_defs.h"
|
||||
|
||||
#define SIMD_ENGINE avx
|
||||
#define SIMD_ENGINE_AVX
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
#define BATCH_SIZE 10000
|
||||
#define RUN_HYBRID
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@
|
|||
*/
|
||||
|
||||
|
||||
#include "template.h"
|
||||
|
||||
#undef SIMD_ENGINE
|
||||
#undef SIMD_ENGINE_AVX
|
||||
|
|
@ -31,6 +30,8 @@
|
|||
#define SIMD_ENGINE sse
|
||||
#define SIMD_ENGINE_SSE
|
||||
|
||||
#include "template.h"
|
||||
|
||||
#include "define-sse-float.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
|
|
|
|||
|
|
@ -28,27 +28,17 @@
|
|||
|
||||
#include "headers.h"
|
||||
|
||||
#define MM 0
|
||||
#define GapM 1
|
||||
#define MX 2
|
||||
#define XX 3
|
||||
#define MY 4
|
||||
#define YY 5
|
||||
|
||||
//#define MROWS 500
|
||||
//#define MCOLS 1000
|
||||
|
||||
#define CAT(X,Y) X####Y
|
||||
#define CONCAT(X,Y) CAT(X,Y)
|
||||
|
||||
#define ALIGNED __attribute__((aligned(32)))
|
||||
|
||||
#ifdef SIMD_ENGINE_AVX
|
||||
typedef union __attribute__((aligned(32))) {
|
||||
ALIGNED __m256 ALIGNED d;
|
||||
ALIGNED __m128i ALIGNED s[2];
|
||||
ALIGNED float ALIGNED f[8];
|
||||
ALIGNED __m256i ALIGNED i;
|
||||
} ALIGNED mix_F ALIGNED;
|
||||
#endif
|
||||
|
||||
typedef union __attribute__((aligned(32))) {
|
||||
ALIGNED __m128 ALIGNED d;
|
||||
|
|
@ -81,12 +71,14 @@ typedef union ALIGNED
|
|||
ALIGNED float ALIGNED f;
|
||||
} ALIGNED IF_32 ALIGNED;
|
||||
|
||||
#ifdef SIMD_ENGINE_AVX
|
||||
typedef union __attribute__((aligned(32))) {
|
||||
ALIGNED __m256d ALIGNED d;
|
||||
ALIGNED __m128i ALIGNED s[2];
|
||||
ALIGNED double ALIGNED f[4];
|
||||
ALIGNED __m256i ALIGNED i;
|
||||
} ALIGNED mix_D ALIGNED;
|
||||
#endif
|
||||
|
||||
typedef union __attribute__((aligned(32))) {
|
||||
ALIGNED __m128d ALIGNED d;
|
||||
|
|
@ -120,200 +112,7 @@ typedef union ALIGNED
|
|||
} ALIGNED IF_64 ALIGNED;
|
||||
|
||||
|
||||
#define MAX_QUAL 254
|
||||
#define MAX_JACOBIAN_TOLERANCE 8.0
|
||||
#define JACOBIAN_LOG_TABLE_STEP 0.0001
|
||||
#define JACOBIAN_LOG_TABLE_INV_STEP (1.0 / JACOBIAN_LOG_TABLE_STEP)
|
||||
#define MAXN 70000
|
||||
#define LOG10_CACHE_SIZE (4*MAXN) // we need to be able to go up to 2*(2N) when calculating some of the coefficients
|
||||
#define JACOBIAN_LOG_TABLE_SIZE ((int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1)
|
||||
|
||||
template<class NUMBER>
|
||||
struct ContextBase
|
||||
{
|
||||
public:
|
||||
NUMBER ph2pr[128];
|
||||
NUMBER INITIAL_CONSTANT;
|
||||
NUMBER LOG10_INITIAL_CONSTANT;
|
||||
NUMBER RESULT_THRESHOLD;
|
||||
|
||||
static bool staticMembersInitializedFlag;
|
||||
static NUMBER jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE];
|
||||
static NUMBER matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1];
|
||||
|
||||
static void initializeStaticMembers()
|
||||
{
|
||||
if(!staticMembersInitializedFlag)
|
||||
{
|
||||
//Order of calls important - Jacobian first, then MatchToMatch
|
||||
initializeJacobianLogTable();
|
||||
initializeMatchToMatchProb();
|
||||
staticMembersInitializedFlag = true;
|
||||
}
|
||||
}
|
||||
|
||||
static void deleteStaticMembers()
|
||||
{
|
||||
if(staticMembersInitializedFlag)
|
||||
{
|
||||
staticMembersInitializedFlag = false;
|
||||
}
|
||||
}
|
||||
|
||||
//Called only once during library load - don't bother to optimize with single precision fp
|
||||
static void initializeJacobianLogTable()
|
||||
{
|
||||
for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
|
||||
jacobianLogTable[k] = (NUMBER)(log10(1.0 + pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP)));
|
||||
}
|
||||
}
|
||||
|
||||
//Called only once per library load - don't bother optimizing with single fp
|
||||
static void initializeMatchToMatchProb()
|
||||
{
|
||||
double LN10 = log(10);
|
||||
double INV_LN10 = 1.0/LN10;
|
||||
for (int i = 0, offset = 0; i <= MAX_QUAL; offset += ++i)
|
||||
for (int j = 0; j <= i; j++) {
|
||||
double log10Sum = approximateLog10SumLog10(-0.1*i, -0.1*j);
|
||||
double matchToMatchLog10 =
|
||||
log1p(-std::min(1.0,pow(10,log10Sum))) * INV_LN10;
|
||||
matchToMatchProb[offset + j] = (NUMBER)(pow(10,matchToMatchLog10));
|
||||
}
|
||||
}
|
||||
//Called during computation - use single precision where possible
|
||||
static int fastRound(NUMBER d) {
|
||||
return (d > ((NUMBER)0.0)) ? (int) (d + ((NUMBER)0.5)) : (int) (d - ((NUMBER)0.5));
|
||||
}
|
||||
//Called during computation - use single precision where possible
|
||||
static NUMBER approximateLog10SumLog10(NUMBER small, NUMBER big) {
|
||||
// make sure small is really the smaller value
|
||||
if (small > big) {
|
||||
NUMBER t = big;
|
||||
big = small;
|
||||
small = t;
|
||||
}
|
||||
|
||||
if (isinf(small) == -1 || isinf(big) == -1)
|
||||
return big;
|
||||
|
||||
NUMBER diff = big - small;
|
||||
if (diff >= ((NUMBER)MAX_JACOBIAN_TOLERANCE))
|
||||
return big;
|
||||
|
||||
// OK, so |y-x| < tol: we use the following identity then:
|
||||
// we need to compute log10(10^x + 10^y)
|
||||
// By Jacobian logarithm identity, this is equal to
|
||||
// max(x,y) + log10(1+10^-abs(x-y))
|
||||
// we compute the second term as a table lookup with integer quantization
|
||||
// we have pre-stored correction for 0,0.1,0.2,... 10.0
|
||||
int ind = fastRound((NUMBER)(diff * ((NUMBER)JACOBIAN_LOG_TABLE_INV_STEP))); // hard rounding
|
||||
return big + jacobianLogTable[ind];
|
||||
}
|
||||
};
|
||||
|
||||
template<class NUMBER>
|
||||
struct Context : public ContextBase<NUMBER>
|
||||
{};
|
||||
|
||||
template<>
|
||||
struct Context<double> : public ContextBase<double>
|
||||
{
|
||||
Context():ContextBase<double>()
|
||||
{
|
||||
for (int x = 0; x < 128; x++)
|
||||
ph2pr[x] = pow(10.0, -((double)x) / 10.0);
|
||||
|
||||
INITIAL_CONSTANT = ldexp(1.0, 1020.0);
|
||||
LOG10_INITIAL_CONSTANT = log10(INITIAL_CONSTANT);
|
||||
RESULT_THRESHOLD = 0.0;
|
||||
}
|
||||
|
||||
double LOG10(double v){ return log10(v); }
|
||||
inline double POW(double b, double e) { return pow(b,e); }
|
||||
|
||||
static double _(double n){ return n; }
|
||||
static double _(float n){ return ((double) n); }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Context<float> : public ContextBase<float>
|
||||
{
|
||||
Context() : ContextBase<float>()
|
||||
{
|
||||
for (int x = 0; x < 128; x++)
|
||||
{
|
||||
ph2pr[x] = powf(10.f, -((float)x) / 10.f);
|
||||
}
|
||||
|
||||
INITIAL_CONSTANT = ldexpf(1.f, 120.f);
|
||||
LOG10_INITIAL_CONSTANT = log10f(INITIAL_CONSTANT);
|
||||
RESULT_THRESHOLD = ldexpf(1.f, -110.f);
|
||||
}
|
||||
|
||||
float LOG10(float v){ return log10f(v); }
|
||||
inline float POW(float b, float e) { return powf(b,e); }
|
||||
|
||||
static float _(double n){ return ((float) n); }
|
||||
static float _(float n){ return n; }
|
||||
};
|
||||
|
||||
#define SET_MATCH_TO_MATCH_PROB(output, insQual, delQual) \
|
||||
{ \
|
||||
int minQual = delQual; \
|
||||
int maxQual = insQual; \
|
||||
if (insQual <= delQual) \
|
||||
{ \
|
||||
minQual = insQual; \
|
||||
maxQual = delQual; \
|
||||
} \
|
||||
(output) = (MAX_QUAL < maxQual) ? \
|
||||
((NUMBER)1.0) - ctx.POW(((NUMBER)10), ctx.approximateLog10SumLog10(((NUMBER)-0.1)*minQual, ((NUMBER)-0.1)*maxQual)) \
|
||||
: ctx.matchToMatchProb[((maxQual * (maxQual + 1)) >> 1) + minQual]; \
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int rslen, haplen;
|
||||
/*int *q, *i, *d, *c;*/
|
||||
/*int q[MROWS], i[MROWS], d[MROWS], c[MROWS];*/
|
||||
char *q, *i, *d, *c;
|
||||
char *hap, *rs;
|
||||
int *ihap;
|
||||
int *irs;
|
||||
} testcase;
|
||||
|
||||
int normalize(char c);
|
||||
int read_testcase(testcase *tc, FILE* ifp=0);
|
||||
|
||||
|
||||
#define MIN_ACCEPTED 1e-28f
|
||||
#define NUM_DISTINCT_CHARS 5
|
||||
#define AMBIG_CHAR 4
|
||||
|
||||
class ConvertChar {
|
||||
|
||||
static uint8_t conversionTable[255] ;
|
||||
|
||||
public:
|
||||
|
||||
static void init() {
|
||||
assert (NUM_DISTINCT_CHARS == 5) ;
|
||||
assert (AMBIG_CHAR == 4) ;
|
||||
|
||||
conversionTable['A'] = 0 ;
|
||||
conversionTable['C'] = 1 ;
|
||||
conversionTable['T'] = 2 ;
|
||||
conversionTable['G'] = 3 ;
|
||||
conversionTable['N'] = 4 ;
|
||||
}
|
||||
|
||||
static inline uint8_t get(uint8_t input) {
|
||||
return conversionTable[input] ;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#include "common_data_structure.h"
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -22,11 +22,8 @@
|
|||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include "headers.h"
|
||||
#include "template.h"
|
||||
#include "utils.h"
|
||||
#include "vector_defs.h"
|
||||
#include "LoadTimeInitializer.h"
|
||||
using namespace std;
|
||||
|
||||
|
|
@ -36,51 +33,107 @@ uint8_t ConvertChar::conversionTable[255];
|
|||
float (*g_compute_full_prob_float)(testcase *tc, float* before_last_log) = 0;
|
||||
double (*g_compute_full_prob_double)(testcase *tc, double* before_last_log) = 0;
|
||||
//Static members in ContextBase
|
||||
template<>
|
||||
bool ContextBase<double>::staticMembersInitializedFlag = false;
|
||||
double ContextBase<double>::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE];
|
||||
double ContextBase<double>::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1];
|
||||
template<>
|
||||
double ContextBase<double>::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE] = { };
|
||||
template<>
|
||||
double ContextBase<double>::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1] = { };
|
||||
template<>
|
||||
bool ContextBase<float>::staticMembersInitializedFlag = false;
|
||||
float ContextBase<float>::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE];
|
||||
float ContextBase<float>::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1];
|
||||
template<>
|
||||
float ContextBase<float>::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE] = { };
|
||||
template<>
|
||||
float ContextBase<float>::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1] = { };
|
||||
|
||||
|
||||
bool search_file_for_string(string filename, string search_string)
|
||||
{
|
||||
ifstream fptr;
|
||||
fptr.open(filename.c_str(),ios::in);
|
||||
if(fptr.is_open())
|
||||
{
|
||||
string buffer;
|
||||
buffer.clear();
|
||||
buffer.resize(4096);
|
||||
bool retvalue = false;
|
||||
while(!fptr.eof())
|
||||
{
|
||||
fptr.getline(&(buffer[0]), 4096);
|
||||
if(buffer.find(search_string) != string::npos) //found string
|
||||
{
|
||||
retvalue = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
buffer.clear();
|
||||
fptr.close();
|
||||
return retvalue;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool is_cpuid_ecx_bit_set(int eax, int bitidx)
|
||||
{
|
||||
int ecx = 0, edx = 0, ebx = 0;
|
||||
__asm__ ("cpuid"
|
||||
:"=b" (ebx),
|
||||
"=c" (ecx),
|
||||
"=d" (edx)
|
||||
:"a" (eax)
|
||||
);
|
||||
return (((ecx >> bitidx)&1) == 1);
|
||||
}
|
||||
|
||||
bool is_avx_supported()
|
||||
{
|
||||
return (_may_i_use_cpu_feature(_FEATURE_AVX) > 0);
|
||||
//int ecx = 0, edx = 0, ebx = 0;
|
||||
//__asm__("cpuid"
|
||||
//: "=b" (ebx),
|
||||
//"=c" (ecx),
|
||||
//"=d" (edx)
|
||||
//: "a" (1)
|
||||
//);
|
||||
//return ((ecx >> 28)&1) == 1;
|
||||
#ifdef __INTEL_COMPILER
|
||||
bool use_avx = _may_i_use_cpu_feature(_FEATURE_AVX);
|
||||
if(use_avx)
|
||||
return true;
|
||||
else
|
||||
{
|
||||
//check if core supports AVX, but kernel does not and print info message
|
||||
if(!is_cpuid_ecx_bit_set(1, 28)) //core does not support AVX
|
||||
return false;
|
||||
//else fall through to end of function
|
||||
}
|
||||
#else
|
||||
if(!__builtin_cpu_supports("avx")) //core does not support AVX
|
||||
return false;
|
||||
else
|
||||
{
|
||||
//core supports AVX, check if kernel supports
|
||||
if(search_file_for_string("/proc/cpuinfo","avx"))
|
||||
return true;
|
||||
//else fall through to end of function
|
||||
|
||||
}
|
||||
#endif //__INTEL_COMPILER
|
||||
clog << "INFO: Your CPU supports AVX vector instructions, but your kernel does not. Try upgrading to a kernel that supports AVX.\n";
|
||||
clog << "INFO: Your program will run correctly, but slower than the AVX version\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
bool is_sse41_supported()
|
||||
{
|
||||
#ifdef __INTEL_COMPILER
|
||||
return (_may_i_use_cpu_feature(_FEATURE_SSE4_1) > 0);
|
||||
//int ecx = 0, edx = 0, ebx = 0;
|
||||
//__asm__("cpuid"
|
||||
//: "=b" (ebx),
|
||||
//"=c" (ecx),
|
||||
//"=d" (edx)
|
||||
//: "a" (1)
|
||||
//);
|
||||
//return ((ecx >> 19)&1) == 1;
|
||||
#else
|
||||
return __builtin_cpu_supports("sse4.1");
|
||||
#endif
|
||||
//return is_cpuid_ecx_bit_set(1, 19);
|
||||
}
|
||||
|
||||
bool is_sse42_supported()
|
||||
{
|
||||
#ifdef __INTEL_COMPILER
|
||||
return (_may_i_use_cpu_feature(_FEATURE_SSE4_2) > 0);
|
||||
//int ecx = 0, edx = 0, ebx = 0;
|
||||
//__asm__("cpuid"
|
||||
//: "=b" (ebx),
|
||||
//"=c" (ecx),
|
||||
//"=d" (edx)
|
||||
//: "a" (1)
|
||||
//);
|
||||
//return ((ecx >> 20)&1) == 1;
|
||||
#else
|
||||
return __builtin_cpu_supports("sse4.2");
|
||||
#endif
|
||||
//return is_cpuid_ecx_bit_set(1, 20);
|
||||
}
|
||||
|
||||
uint64_t get_machine_capabilities()
|
||||
|
|
@ -154,10 +207,14 @@ int read_testcase(testcase *tc, FILE* ifp)
|
|||
|
||||
tc->haplen = strlen(tc->hap);
|
||||
tc->rslen = strlen(tc->rs);
|
||||
assert(strlen(q) == tc->rslen);
|
||||
assert(strlen(i) == tc->rslen);
|
||||
assert(strlen(d) == tc->rslen);
|
||||
assert(strlen(c) == tc->rslen);
|
||||
assert(strlen(q) == (size_t)tc->rslen);
|
||||
assert(strlen(i) == (size_t)tc->rslen);
|
||||
assert(strlen(d) == (size_t)tc->rslen);
|
||||
assert(strlen(c) == (size_t)tc->rslen);
|
||||
|
||||
g_load_time_initializer.update_stat(READ_LENGTH_IDX, tc->rslen);
|
||||
g_load_time_initializer.update_stat(HAPLOTYPE_LENGTH_IDX, tc->haplen);
|
||||
g_load_time_initializer.update_stat(PRODUCT_READ_LENGTH_HAPLOTYPE_LENGTH_IDX, tc->haplen*tc->rslen);
|
||||
//assert(tc->rslen < MROWS);
|
||||
//tc->ihap = (int *) malloc(tc->haplen*sizeof(int));
|
||||
//tc->irs = (int *) malloc(tc->rslen*sizeof(int));
|
||||
|
|
@ -279,15 +336,15 @@ int read_mod_testcase(ifstream& fptr, testcase* tc, bool reformat)
|
|||
tc->c = new char[tc->rslen];
|
||||
//cout << "Lengths "<<tc->haplen <<" "<<tc->rslen<<"\n";
|
||||
memcpy(tc->rs, tokens[1].c_str(),tokens[1].size());
|
||||
assert(tokens.size() == 2 + 4*(tc->rslen));
|
||||
assert(tokens.size() == (size_t)(2 + 4*(tc->rslen)));
|
||||
//assert(tc->rslen < MROWS);
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
for(int j=0;j<tc->rslen;++j)
|
||||
tc->q[j] = (char)convToInt(tokens[2+0*tc->rslen+j]);
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
for(int j=0;j<tc->rslen;++j)
|
||||
tc->i[j] = (char)convToInt(tokens[2+1*tc->rslen+j]);
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
for(int j=0;j<tc->rslen;++j)
|
||||
tc->d[j] = (char)convToInt(tokens[2+2*tc->rslen+j]);
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
for(int j=0;j<tc->rslen;++j)
|
||||
tc->c[j] = (char)convToInt(tokens[2+3*tc->rslen+j]);
|
||||
|
||||
if(reformat)
|
||||
|
|
@ -297,16 +354,16 @@ int read_mod_testcase(ifstream& fptr, testcase* tc, bool reformat)
|
|||
assert(ofptr.is_open());
|
||||
ofptr << tokens[0] << " ";
|
||||
ofptr << tokens[1] << " ";
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
for(int j=0;j<tc->rslen;++j)
|
||||
ofptr << ((char)(tc->q[j]+33));
|
||||
ofptr << " ";
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
for(int j=0;j<tc->rslen;++j)
|
||||
ofptr << ((char)(tc->i[j]+33));
|
||||
ofptr << " ";
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
for(int j=0;j<tc->rslen;++j)
|
||||
ofptr << ((char)(tc->d[j]+33));
|
||||
ofptr << " ";
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
for(int j=0;j<tc->rslen;++j)
|
||||
ofptr << ((char)(tc->c[j]+33));
|
||||
ofptr << " 0 false\n";
|
||||
|
||||
|
|
@ -363,6 +420,10 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size,
|
|||
ifptr.open(filename);
|
||||
assert(ifptr.is_open());
|
||||
}
|
||||
#ifdef PRINT_PER_INTERVAL_TIMINGS
|
||||
ofstream times_fptr;
|
||||
times_fptr.open("native_timed_intervals.csv",ios::out);
|
||||
#endif
|
||||
vector<testcase> tc_vector;
|
||||
tc_vector.clear();
|
||||
testcase tc;
|
||||
|
|
@ -377,11 +438,11 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size,
|
|||
uint32_t no_kernel_mask = (1 << 17); //bit 16 user mode, bit 17 kernel mode
|
||||
PAPI_num_counters();
|
||||
int events[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 };
|
||||
char* eventnames[NUM_PAPI_COUNTERS]= { "cycles", "itlb_walk_cycles", "dtlb_load_walk_cycles", "dtlb_store_walk_cycles" };
|
||||
char* eventnames[NUM_PAPI_COUNTERS]= { "cycles", "l1_pending_miss", "lfb_hit", "l2_hit" };
|
||||
assert(PAPI_event_name_to_code("UNHALTED_REFERENCE_CYCLES:u=1:k=1",&(events[0])) == PAPI_OK);
|
||||
assert(PAPI_event_name_to_code("ITLB_MISSES:WALK_DURATION", &(events[1])) == PAPI_OK);
|
||||
assert(PAPI_event_name_to_code("DTLB_LOAD_MISSES:WALK_DURATION", &(events[2])) == PAPI_OK);
|
||||
assert(PAPI_event_name_to_code("DTLB_STORE_MISSES:WALK_DURATION", &(events[3])) == PAPI_OK);
|
||||
assert(PAPI_event_name_to_code("L1D_PEND_MISS:OCCURRENCES", &(events[1])) == PAPI_OK);
|
||||
assert(PAPI_event_name_to_code("MEM_LOAD_UOPS_RETIRED:HIT_LFB", &(events[2])) == PAPI_OK);
|
||||
assert(PAPI_event_name_to_code("MEM_LOAD_UOPS_RETIRED:L2_HIT", &(events[3])) == PAPI_OK);
|
||||
long long values[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 };
|
||||
long long accum_values[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 };
|
||||
#endif
|
||||
|
|
@ -398,6 +459,9 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size,
|
|||
baseline_results_vec.clear();
|
||||
results_vec.resize(tc_vector.size());
|
||||
baseline_results_vec.resize(tc_vector.size());
|
||||
g_load_time_initializer.update_stat(NUM_TESTCASES_IDX, tc_vector.size());
|
||||
g_load_time_initializer.update_stat(NUM_READS_IDX, tc_vector.size());
|
||||
g_load_time_initializer.update_stat(NUM_HAPLOTYPES_IDX, tc_vector.size());
|
||||
struct timespec start_time;
|
||||
#ifdef USE_PAPI
|
||||
assert(PAPI_start_counters(events, NUM_PAPI_COUNTERS) == PAPI_OK);
|
||||
|
|
@ -429,7 +493,11 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size,
|
|||
#ifdef USE_PAPI
|
||||
assert(PAPI_stop_counters(values, NUM_PAPI_COUNTERS) == PAPI_OK);
|
||||
#endif
|
||||
vector_compute_time += diff_time(start_time);
|
||||
uint64_t curr_interval = diff_time(start_time);
|
||||
#ifdef PRINT_PER_INTERVAL_TIMINGS
|
||||
times_fptr << curr_interval << "\n";
|
||||
#endif
|
||||
vector_compute_time += curr_interval;
|
||||
#ifdef USE_PAPI
|
||||
for(unsigned k=0;k<NUM_PAPI_COUNTERS;++k)
|
||||
accum_values[k] += values[k];
|
||||
|
|
@ -488,9 +556,12 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size,
|
|||
for(unsigned i=0;i<NUM_PAPI_COUNTERS;++i)
|
||||
cout << eventnames[i] << " : "<<accum_values[i]<<"\n";
|
||||
#endif
|
||||
|
||||
#ifdef PRINT_PER_INTERVAL_TIMINGS
|
||||
times_fptr.close();
|
||||
#endif
|
||||
if(use_old_read_testcase)
|
||||
fclose(fptr);
|
||||
else
|
||||
ifptr.close();
|
||||
g_load_time_initializer.print_profiling();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@
|
|||
#ifndef PAIRHMM_UTIL_H
|
||||
#define PAIRHMM_UTIL_H
|
||||
|
||||
#include "template.h"
|
||||
#include "common_data_structure.h"
|
||||
|
||||
template<class T>
|
||||
std::string to_string(T obj)
|
||||
|
|
@ -50,6 +50,15 @@ extern double (*g_compute_full_prob_double)(testcase *tc, double* before_last_lo
|
|||
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline);
|
||||
template<class NUMBER>
|
||||
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0);
|
||||
template<class NUMBER>
|
||||
NUMBER compute_full_prob_avxd(testcase *tc, NUMBER *before_last_log=0);
|
||||
template<class NUMBER>
|
||||
NUMBER compute_full_prob_avxs(testcase *tc, NUMBER *before_last_log=0);
|
||||
template<class NUMBER>
|
||||
NUMBER compute_full_prob_ssed(testcase *tc, NUMBER *before_last_log=0);
|
||||
template<class NUMBER>
|
||||
NUMBER compute_full_prob_sses(testcase *tc, NUMBER *before_last_log=0);
|
||||
|
||||
double getCurrClk();
|
||||
void get_time(struct timespec* x);
|
||||
uint64_t diff_time(struct timespec& prev_time);
|
||||
|
|
@ -71,5 +80,6 @@ void do_compute(char* filename, bool use_old_read_testcase=true, unsigned chunk_
|
|||
/*#define DUMP_COMPUTE_VALUES 1*/
|
||||
#define BATCH_SIZE 10000
|
||||
#define RUN_HYBRID
|
||||
/*#define PRINT_PER_INTERVAL_TIMINGS 1*/
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1,55 +0,0 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#undef SIMD_ENGINE
|
||||
#undef SIMD_ENGINE_AVX
|
||||
#undef SIMD_ENGINE_SSE
|
||||
|
||||
#define SIMD_ENGINE avx
|
||||
#define SIMD_ENGINE_AVX
|
||||
|
||||
#include "define-float.h"
|
||||
#include "vector_function_prototypes.h"
|
||||
|
||||
#include "define-double.h"
|
||||
#include "vector_function_prototypes.h"
|
||||
|
||||
#undef SIMD_ENGINE
|
||||
#undef SIMD_ENGINE_AVX
|
||||
|
||||
#define SIMD_ENGINE sse
|
||||
#define SIMD_ENGINE_SSE
|
||||
|
||||
|
||||
#include "define-sse-float.h"
|
||||
#include "vector_function_prototypes.h"
|
||||
|
||||
#include "define-sse-double.h"
|
||||
#include "vector_function_prototypes.h"
|
||||
|
||||
#undef SIMD_ENGINE
|
||||
#undef SIMD_ENGINE_AVX
|
||||
#undef SIMD_ENGINE_SSE
|
||||
|
||||
|
|
@ -1,44 +0,0 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
inline void CONCAT(CONCAT(_vector_shift,SIMD_ENGINE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut);
|
||||
inline void CONCAT(CONCAT(_vector_shift_last,SIMD_ENGINE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn);
|
||||
inline void CONCAT(CONCAT(precompute_masks_,SIMD_ENGINE), PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]);
|
||||
inline void CONCAT(CONCAT(init_masks_for_row_,SIMD_ENGINE), PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess);
|
||||
inline void CONCAT(CONCAT(update_masks_for_cols_,SIMD_ENGINE), PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt);
|
||||
inline void CONCAT(CONCAT(computeDistVec,SIMD_ENGINE), PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, SIMD_TYPE& distm, SIMD_TYPE& _1_distm, SIMD_TYPE& distmChosen);
|
||||
template<class NUMBER> inline void CONCAT(CONCAT(initializeVectors,SIMD_ENGINE), PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context<NUMBER> ctx, testcase *tc, SIMD_TYPE *p_MM, SIMD_TYPE *p_GAPM, SIMD_TYPE *p_MX, SIMD_TYPE *p_XX, SIMD_TYPE *p_MY, SIMD_TYPE *p_YY, SIMD_TYPE *distm1D);
|
||||
template<class NUMBER> inline void CONCAT(CONCAT(stripINITIALIZATION,SIMD_ENGINE), PRECISION)(
|
||||
int stripIdx, Context<NUMBER> ctx, testcase *tc, SIMD_TYPE &pGAPM, SIMD_TYPE &pMM, SIMD_TYPE &pMX, SIMD_TYPE &pXX, SIMD_TYPE &pMY, SIMD_TYPE &pYY,
|
||||
SIMD_TYPE &rs, UNION_TYPE &rsN, SIMD_TYPE &distm, SIMD_TYPE &_1_distm, SIMD_TYPE *distm1D, SIMD_TYPE N_packed256, SIMD_TYPE *p_MM , SIMD_TYPE *p_GAPM ,
|
||||
SIMD_TYPE *p_MX, SIMD_TYPE *p_XX , SIMD_TYPE *p_MY, SIMD_TYPE *p_YY, UNION_TYPE &M_t_2, UNION_TYPE &X_t_2, UNION_TYPE &M_t_1, UNION_TYPE &X_t_1,
|
||||
UNION_TYPE &Y_t_2, UNION_TYPE &Y_t_1, UNION_TYPE &M_t_1_y, NUMBER* shiftOutX, NUMBER* shiftOutM);
|
||||
inline SIMD_TYPE CONCAT(CONCAT(computeDISTM,SIMD_ENGINE), PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, SIMD_TYPE rs, UNION_TYPE rsN, SIMD_TYPE N_packed256,
|
||||
SIMD_TYPE distm, SIMD_TYPE _1_distm);
|
||||
inline void CONCAT(CONCAT(computeMXY,SIMD_ENGINE), PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y,
|
||||
UNION_TYPE M_t_2, UNION_TYPE X_t_2, UNION_TYPE Y_t_2, UNION_TYPE M_t_1, UNION_TYPE X_t_1, UNION_TYPE M_t_1_y, UNION_TYPE Y_t_1,
|
||||
SIMD_TYPE pMM, SIMD_TYPE pGAPM, SIMD_TYPE pMX, SIMD_TYPE pXX, SIMD_TYPE pMY, SIMD_TYPE pYY, SIMD_TYPE distmSel);
|
||||
template<class NUMBER> NUMBER CONCAT(CONCAT(compute_full_prob_,SIMD_ENGINE), PRECISION) (testcase *tc, NUMBER *before_last_log = NULL);
|
||||
|
||||
|
|
@ -78,8 +78,9 @@ public abstract class PairHMM {
|
|||
protected double[] mLikelihoodArray;
|
||||
|
||||
//profiling information
|
||||
protected static final boolean doProfiling = true;
|
||||
protected long computeTime = 0;
|
||||
protected static Boolean doProfiling = true;
|
||||
protected static long pairHMMComputeTime = 0;
|
||||
protected long threadLocalPairHMMComputeTimeDiff = 0;
|
||||
protected long startTime = 0;
|
||||
|
||||
/**
|
||||
|
|
@ -207,7 +208,13 @@ public abstract class PairHMM {
|
|||
}
|
||||
}
|
||||
if(doProfiling)
|
||||
computeTime += (System.nanoTime() - startTime);
|
||||
{
|
||||
threadLocalPairHMMComputeTimeDiff = (System.nanoTime() - startTime);
|
||||
//synchronized(doProfiling)
|
||||
{
|
||||
pairHMMComputeTime += threadLocalPairHMMComputeTimeDiff;
|
||||
}
|
||||
}
|
||||
return likelihoodMap;
|
||||
}
|
||||
|
||||
|
|
@ -314,6 +321,17 @@ public abstract class PairHMM {
|
|||
return Math.min(haplotype1.length, haplotype2.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use number of threads to set doProfiling flag - doProfiling iff numThreads == 1
|
||||
* This function should be called only during initialization phase - single thread phase of HC
|
||||
*/
|
||||
public static void setNumberOfThreads(final int numThreads)
|
||||
{
|
||||
doProfiling = (numThreads == 1);
|
||||
if(numThreads > 1)
|
||||
logger.info("Performance profiling for PairHMM is disabled because HaplotypeCaller is being run with multiple threads (-nct>1) option\nProfiling is enabled only when running in single thread mode\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the results of the computeLikelihoods function
|
||||
*/
|
||||
|
|
@ -324,6 +342,6 @@ public abstract class PairHMM {
|
|||
public void close()
|
||||
{
|
||||
if(doProfiling)
|
||||
System.out.println("Total compute time in PairHMM computeLikelihoods() : "+(computeTime*1e-9));
|
||||
System.out.println("Total compute time in PairHMM computeLikelihoods() : "+(pairHMMComputeTime*1e-9));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
Loading…
Reference in New Issue