1. Changed logger.info to logger.warn in PairHMMLikelihoodCalculationEngine.java
2. Committing the right set of files after rebase
This commit is contained in:
parent
37526dfad5
commit
1b395a871a
|
|
@ -98,7 +98,7 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
|
||||||
}
|
}
|
||||||
catch(UnsatisfiedLinkError ule)
|
catch(UnsatisfiedLinkError ule)
|
||||||
{
|
{
|
||||||
logger.info("Failed to load native library for VectorLoglessPairHMM - using Java implementation of LOGLESS_CACHING");
|
logger.warn("Failed to load native library for VectorLoglessPairHMM - using Java implementation of LOGLESS_CACHING");
|
||||||
return new LoglessPairHMM();
|
return new LoglessPairHMM();
|
||||||
}
|
}
|
||||||
case DEBUG_VECTOR_LOGLESS_CACHING:
|
case DEBUG_VECTOR_LOGLESS_CACHING:
|
||||||
|
|
|
||||||
|
|
@ -449,21 +449,7 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
||||||
})
|
})
|
||||||
@Ensures("constantsAreInitialized")
|
@Ensures("constantsAreInitialized")
|
||||||
protected static void initializeProbabilities(final double[][] transition, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP) {
|
protected static void initializeProbabilities(final double[][] transition, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP) {
|
||||||
for (int i = 0; i < insertionGOP.length; i++) {
|
PairHMMModel.qualToTransProbs(transition,insertionGOP,deletionGOP,overallGCP);
|
||||||
final int qualIndexGOP = Math.min(insertionGOP[i] + deletionGOP[i], Byte.MAX_VALUE);
|
|
||||||
transition[i+1][matchToMatch] = QualityUtils.qualToProb((byte) qualIndexGOP);
|
|
||||||
transition[i+1][indelToMatch] = QualityUtils.qualToProb(overallGCP[i]);
|
|
||||||
transition[i+1][matchToInsertion] = QualityUtils.qualToErrorProb(insertionGOP[i]);
|
|
||||||
transition[i+1][insertionToInsertion] = QualityUtils.qualToErrorProb(overallGCP[i]);
|
|
||||||
transition[i+1][matchToDeletion] = QualityUtils.qualToErrorProb(deletionGOP[i]);
|
|
||||||
transition[i+1][deletionToDeletion] = QualityUtils.qualToErrorProb(overallGCP[i]);
|
|
||||||
|
|
||||||
//TODO it seems that it is not always the case that matchToMatch + matchToDeletion + matchToInsertion == 1.
|
|
||||||
//TODO We have detected cases of 1.00002 which can cause problems downstream. This are typically masked
|
|
||||||
//TODO by the fact that we always add a indelToMatch penalty to all PairHMM likelihoods (~ -0.1)
|
|
||||||
//TODO This is in fact not well justified and although it does not have any effect (since is equally added to all
|
|
||||||
//TODO haplotypes likelihoods) perhaps we should just remove it eventually and fix this != 1.0 issue here.
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,6 @@ char* LoadTimeInitializerStatsNames[] =
|
||||||
"dummy"
|
"dummy"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
LoadTimeInitializer g_load_time_initializer;
|
LoadTimeInitializer g_load_time_initializer;
|
||||||
|
|
||||||
LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loaded
|
LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loaded
|
||||||
|
|
@ -54,6 +53,10 @@ LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loa
|
||||||
|
|
||||||
initialize_function_pointers();
|
initialize_function_pointers();
|
||||||
|
|
||||||
|
//Initialize static members of class
|
||||||
|
Context<float>::initializeStaticMembers();
|
||||||
|
Context<double>::initializeStaticMembers();
|
||||||
|
|
||||||
cout.flush();
|
cout.flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,10 @@ class LoadTimeInitializer
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
LoadTimeInitializer(); //will be called when library is loaded
|
LoadTimeInitializer(); //will be called when library is loaded
|
||||||
~LoadTimeInitializer() { delete m_buffer; }
|
~LoadTimeInitializer()
|
||||||
|
{
|
||||||
|
delete m_buffer;
|
||||||
|
}
|
||||||
void print_profiling();
|
void print_profiling();
|
||||||
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true);
|
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true);
|
||||||
void debug_close();
|
void debug_close();
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
||||||
#CFLAGS=-O2 -std=c++11 -W -Wall -march=corei7-avx -Wa,-q -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
|
#CFLAGS=-O2 -std=c++11 -W -Wall -march=corei7-avx -Wa,-q -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
|
||||||
#CFLAGS=-O2 -W -Wall -march=corei7 -mfpmath=sse -msse4.2 -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
|
#CFLAGS=-O2 -W -Wall -march=corei7 -mfpmath=sse -msse4.2 -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
|
||||||
|
|
||||||
JRE_HOME?=/opt/jdk1.7.0_25/
|
JRE_HOME?=/opt/jdk1.7.0_25/jre
|
||||||
JNI_COMPILATION_FLAGS=-D_REENTRANT -fPIC -I${JRE_HOME}/../include -I${JRE_HOME}/../include/linux
|
JNI_COMPILATION_FLAGS=-D_REENTRANT -fPIC -I${JRE_HOME}/../include -I${JRE_HOME}/../include/linux
|
||||||
|
|
||||||
COMMON_COMPILATION_FLAGS=$(JNI_COMPILATION_FLAGS) -O3 -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
|
COMMON_COMPILATION_FLAGS=$(JNI_COMPILATION_FLAGS) -O3 -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,8 @@
|
||||||
#include "template.h"
|
#include "template.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "LoadTimeInitializer.h"
|
#include "LoadTimeInitializer.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
template<class NUMBER>
|
template<class NUMBER>
|
||||||
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
|
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
|
||||||
{
|
{
|
||||||
|
|
@ -62,7 +62,8 @@ NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
|
||||||
int _i = tc->i[r-1] & 127;
|
int _i = tc->i[r-1] & 127;
|
||||||
int _d = tc->d[r-1] & 127;
|
int _d = tc->d[r-1] & 127;
|
||||||
int _c = tc->c[r-1] & 127;
|
int _c = tc->c[r-1] & 127;
|
||||||
p[r][MM] = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127];
|
//p[r][MM] = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127];
|
||||||
|
SET_MATCH_TO_MATCH_PROB(p[r][MM], _i, _d);
|
||||||
p[r][GapM] = ctx._(1.0) - ctx.ph2pr[_c];
|
p[r][GapM] = ctx._(1.0) - ctx.ph2pr[_c];
|
||||||
p[r][MX] = ctx.ph2pr[_i];
|
p[r][MX] = ctx.ph2pr[_i];
|
||||||
p[r][XX] = ctx.ph2pr[_c];
|
p[r][XX] = ctx.ph2pr[_c];
|
||||||
|
|
|
||||||
|
|
@ -116,7 +116,8 @@ template<class NUMBER> void CONCAT(CONCAT(initializeVectors,SIMD_ENGINE), PRECIS
|
||||||
int _d = tc->d[r-1] & 127;
|
int _d = tc->d[r-1] & 127;
|
||||||
int _c = tc->c[r-1] & 127;
|
int _c = tc->c[r-1] & 127;
|
||||||
|
|
||||||
*(ptr_p_MM+r-1) = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127];
|
//*(ptr_p_MM+r-1) = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127];
|
||||||
|
SET_MATCH_TO_MATCH_PROB(*(ptr_p_MM+r-1), _i, _d);
|
||||||
*(ptr_p_GAPM+r-1) = ctx._(1.0) - ctx.ph2pr[_c];
|
*(ptr_p_GAPM+r-1) = ctx._(1.0) - ctx.ph2pr[_c];
|
||||||
*(ptr_p_MX+r-1) = ctx.ph2pr[_i];
|
*(ptr_p_MX+r-1) = ctx.ph2pr[_i];
|
||||||
*(ptr_p_XX+r-1) = ctx.ph2pr[_c];
|
*(ptr_p_XX+r-1) = ctx.ph2pr[_c];
|
||||||
|
|
|
||||||
|
|
@ -8,8 +8,8 @@ then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
#-Djava.library.path is needed if you wish to override the default 'packed' library
|
#-Djava.library.path is needed if you wish to override the default 'packed' library
|
||||||
#java -Djava.library.path=${GSA_ROOT_DIR}/public/VectorPairHMM/src/main/c++ -jar $GSA_ROOT_DIR/target/GenomeAnalysisTK.jar -T HaplotypeCaller \
|
#java -jar $GSA_ROOT_DIR/target/GenomeAnalysisTK.jar -T HaplotypeCaller \
|
||||||
java -jar $GSA_ROOT_DIR/target/GenomeAnalysisTK.jar -T HaplotypeCaller \
|
java -Djava.library.path=${GSA_ROOT_DIR}/public/VectorPairHMM/src/main/c++ -jar $GSA_ROOT_DIR/target/GenomeAnalysisTK.jar -T HaplotypeCaller \
|
||||||
--dbsnp /data/broad/samples/joint_variant_calling/dbSNP/00-All.vcf \
|
--dbsnp /data/broad/samples/joint_variant_calling/dbSNP/00-All.vcf \
|
||||||
-R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \
|
-R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \
|
||||||
-I /data/simulated/sim1M_pairs_final.bam \
|
-I /data/simulated/sim1M_pairs_final.bam \
|
||||||
|
|
|
||||||
|
|
@ -94,57 +94,158 @@ typedef union ALIGNED
|
||||||
ALIGNED double ALIGNED f;
|
ALIGNED double ALIGNED f;
|
||||||
} ALIGNED IF_64 ALIGNED;
|
} ALIGNED IF_64 ALIGNED;
|
||||||
|
|
||||||
template<class T>
|
|
||||||
struct Context{};
|
#define MAX_QUAL 254
|
||||||
|
#define MAX_JACOBIAN_TOLERANCE 8.0
|
||||||
|
#define JACOBIAN_LOG_TABLE_STEP 0.0001
|
||||||
|
#define JACOBIAN_LOG_TABLE_INV_STEP (1.0 / JACOBIAN_LOG_TABLE_STEP)
|
||||||
|
#define MAXN 70000
|
||||||
|
#define LOG10_CACHE_SIZE (4*MAXN) // we need to be able to go up to 2*(2N) when calculating some of the coefficients
|
||||||
|
#define JACOBIAN_LOG_TABLE_SIZE ((int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1)
|
||||||
|
|
||||||
|
template<class NUMBER>
|
||||||
|
struct ContextBase
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
NUMBER ph2pr[128];
|
||||||
|
NUMBER INITIAL_CONSTANT;
|
||||||
|
NUMBER LOG10_INITIAL_CONSTANT;
|
||||||
|
NUMBER RESULT_THRESHOLD;
|
||||||
|
|
||||||
|
static bool staticMembersInitializedFlag;
|
||||||
|
static NUMBER jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE];
|
||||||
|
static NUMBER matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1];
|
||||||
|
|
||||||
|
static void initializeStaticMembers()
|
||||||
|
{
|
||||||
|
if(!staticMembersInitializedFlag)
|
||||||
|
{
|
||||||
|
//Order of calls important - Jacobian first, then MatchToMatch
|
||||||
|
initializeJacobianLogTable();
|
||||||
|
initializeMatchToMatchProb();
|
||||||
|
staticMembersInitializedFlag = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void deleteStaticMembers()
|
||||||
|
{
|
||||||
|
if(staticMembersInitializedFlag)
|
||||||
|
{
|
||||||
|
staticMembersInitializedFlag = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//Called only once during library load - don't bother to optimize with single precision fp
|
||||||
|
static void initializeJacobianLogTable()
|
||||||
|
{
|
||||||
|
for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
|
||||||
|
jacobianLogTable[k] = (NUMBER)(log10(1.0 + pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//Called only once per library load - don't bother optimizing with single fp
|
||||||
|
static void initializeMatchToMatchProb()
|
||||||
|
{
|
||||||
|
double LN10 = log(10);
|
||||||
|
double INV_LN10 = 1.0/LN10;
|
||||||
|
for (int i = 0, offset = 0; i <= MAX_QUAL; offset += ++i)
|
||||||
|
for (int j = 0; j <= i; j++) {
|
||||||
|
double log10Sum = approximateLog10SumLog10(-0.1*i, -0.1*j);
|
||||||
|
double matchToMatchLog10 =
|
||||||
|
log1p(-std::min(1.0,pow(10,log10Sum))) * INV_LN10;
|
||||||
|
matchToMatchProb[offset + j] = (NUMBER)(pow(10,matchToMatchLog10));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//Called during computation - use single precision where possible
|
||||||
|
static int fastRound(NUMBER d) {
|
||||||
|
return (d > ((NUMBER)0.0)) ? (int) (d + ((NUMBER)0.5)) : (int) (d - ((NUMBER)0.5));
|
||||||
|
}
|
||||||
|
//Called during computation - use single precision where possible
|
||||||
|
static NUMBER approximateLog10SumLog10(NUMBER small, NUMBER big) {
|
||||||
|
// make sure small is really the smaller value
|
||||||
|
if (small > big) {
|
||||||
|
NUMBER t = big;
|
||||||
|
big = small;
|
||||||
|
small = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isinf(small) == -1 || isinf(big) == -1)
|
||||||
|
return big;
|
||||||
|
|
||||||
|
NUMBER diff = big - small;
|
||||||
|
if (diff >= ((NUMBER)MAX_JACOBIAN_TOLERANCE))
|
||||||
|
return big;
|
||||||
|
|
||||||
|
// OK, so |y-x| < tol: we use the following identity then:
|
||||||
|
// we need to compute log10(10^x + 10^y)
|
||||||
|
// By Jacobian logarithm identity, this is equal to
|
||||||
|
// max(x,y) + log10(1+10^-abs(x-y))
|
||||||
|
// we compute the second term as a table lookup with integer quantization
|
||||||
|
// we have pre-stored correction for 0,0.1,0.2,... 10.0
|
||||||
|
int ind = fastRound((NUMBER)(diff * ((NUMBER)JACOBIAN_LOG_TABLE_INV_STEP))); // hard rounding
|
||||||
|
return big + jacobianLogTable[ind];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class NUMBER>
|
||||||
|
struct Context : public ContextBase<NUMBER>
|
||||||
|
{};
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
struct Context<double>
|
struct Context<double> : public ContextBase<double>
|
||||||
{
|
{
|
||||||
Context()
|
Context():ContextBase<double>()
|
||||||
{
|
{
|
||||||
for (int x = 0; x < 128; x++)
|
for (int x = 0; x < 128; x++)
|
||||||
ph2pr[x] = pow(10.0, -((double)x) / 10.0);
|
ph2pr[x] = pow(10.0, -((double)x) / 10.0);
|
||||||
|
|
||||||
INITIAL_CONSTANT = ldexp(1.0, 1020.0);
|
INITIAL_CONSTANT = ldexp(1.0, 1020.0);
|
||||||
LOG10_INITIAL_CONSTANT = log10(INITIAL_CONSTANT);
|
LOG10_INITIAL_CONSTANT = log10(INITIAL_CONSTANT);
|
||||||
RESULT_THRESHOLD = 0.0;
|
RESULT_THRESHOLD = 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
double LOG10(double v){ return log10(v); }
|
double LOG10(double v){ return log10(v); }
|
||||||
|
inline double POW(double b, double e) { return pow(b,e); }
|
||||||
|
|
||||||
static double _(double n){ return n; }
|
static double _(double n){ return n; }
|
||||||
static double _(float n){ return ((double) n); }
|
static double _(float n){ return ((double) n); }
|
||||||
double ph2pr[128];
|
|
||||||
double INITIAL_CONSTANT;
|
|
||||||
double LOG10_INITIAL_CONSTANT;
|
|
||||||
double RESULT_THRESHOLD;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
struct Context<float>
|
struct Context<float> : public ContextBase<float>
|
||||||
{
|
{
|
||||||
Context()
|
Context() : ContextBase<float>()
|
||||||
{
|
{
|
||||||
for (int x = 0; x < 128; x++)
|
for (int x = 0; x < 128; x++)
|
||||||
{
|
{
|
||||||
ph2pr[x] = powf(10.f, -((float)x) / 10.f);
|
ph2pr[x] = powf(10.f, -((float)x) / 10.f);
|
||||||
}
|
}
|
||||||
|
|
||||||
INITIAL_CONSTANT = ldexpf(1.f, 120.f);
|
INITIAL_CONSTANT = ldexpf(1.f, 120.f);
|
||||||
LOG10_INITIAL_CONSTANT = log10f(INITIAL_CONSTANT);
|
LOG10_INITIAL_CONSTANT = log10f(INITIAL_CONSTANT);
|
||||||
RESULT_THRESHOLD = ldexpf(1.f, -110.f);
|
RESULT_THRESHOLD = ldexpf(1.f, -110.f);
|
||||||
}
|
}
|
||||||
|
|
||||||
float LOG10(float v){ return log10f(v); }
|
float LOG10(float v){ return log10f(v); }
|
||||||
|
inline float POW(float b, float e) { return powf(b,e); }
|
||||||
|
|
||||||
static float _(double n){ return ((float) n); }
|
static float _(double n){ return ((float) n); }
|
||||||
static float _(float n){ return n; }
|
static float _(float n){ return n; }
|
||||||
float ph2pr[128];
|
|
||||||
float INITIAL_CONSTANT;
|
|
||||||
float LOG10_INITIAL_CONSTANT;
|
|
||||||
float RESULT_THRESHOLD;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define SET_MATCH_TO_MATCH_PROB(output, insQual, delQual) \
|
||||||
|
{ \
|
||||||
|
int minQual = delQual; \
|
||||||
|
int maxQual = insQual; \
|
||||||
|
if (insQual <= delQual) \
|
||||||
|
{ \
|
||||||
|
minQual = insQual; \
|
||||||
|
maxQual = delQual; \
|
||||||
|
} \
|
||||||
|
(output) = (MAX_QUAL < maxQual) ? \
|
||||||
|
((NUMBER)1.0) - ctx.POW(((NUMBER)10), ctx.approximateLog10SumLog10(((NUMBER)-0.1)*minQual, ((NUMBER)-0.1)*maxQual)) \
|
||||||
|
: ctx.matchToMatchProb[((maxQual * (maxQual + 1)) >> 1) + minQual]; \
|
||||||
|
}
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -3,12 +3,21 @@
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "vector_defs.h"
|
#include "vector_defs.h"
|
||||||
#include "LoadTimeInitializer.h"
|
#include "LoadTimeInitializer.h"
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
//static members from ConvertChar
|
||||||
uint8_t ConvertChar::conversionTable[255];
|
uint8_t ConvertChar::conversionTable[255];
|
||||||
|
//Global function pointers in utils.h
|
||||||
float (*g_compute_full_prob_float)(testcase *tc, float* before_last_log) = 0;
|
float (*g_compute_full_prob_float)(testcase *tc, float* before_last_log) = 0;
|
||||||
double (*g_compute_full_prob_double)(testcase *tc, double* before_last_log) = 0;
|
double (*g_compute_full_prob_double)(testcase *tc, double* before_last_log) = 0;
|
||||||
|
//Static members in ContextBase
|
||||||
|
bool ContextBase<double>::staticMembersInitializedFlag = false;
|
||||||
|
double ContextBase<double>::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE];
|
||||||
|
double ContextBase<double>::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1];
|
||||||
|
bool ContextBase<float>::staticMembersInitializedFlag = false;
|
||||||
|
float ContextBase<float>::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE];
|
||||||
|
float ContextBase<float>::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1];
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
bool is_avx_supported()
|
bool is_avx_supported()
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Binary file not shown.
Loading…
Reference in New Issue