1. Inserted #define in sandbox pairhmm-template-main.cc
2. Wrapped _mm_empty() with ifdef SIMD_TYPE_SSE 3. OpenMP disabled 4. Added code for initializing PairHMM's data inside initializePairHMM - not used yet
This commit is contained in:
parent
28891117e2
commit
88c08e78e7
Binary file not shown.
|
|
@ -58,47 +58,53 @@ int main(int argc, char** argv)
|
|||
vector<testcase> tc_vector;
|
||||
tc_vector.clear();
|
||||
testcase tc;
|
||||
double total_time = 0;
|
||||
while(1)
|
||||
{
|
||||
int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true);
|
||||
if(break_value >= 0)
|
||||
tc_vector.push_back(tc);
|
||||
if(tc_vector.size() == BATCH_SIZE || (break_value < 0 && tc_vector.size() > 0))
|
||||
{
|
||||
vector<double> results_vec;
|
||||
results_vec.clear();
|
||||
results_vec.resize(tc_vector.size());
|
||||
double start_time = getCurrClk();
|
||||
#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12)
|
||||
for(unsigned i=0;i<tc_vector.size();++i)
|
||||
{
|
||||
testcase& tc = tc_vector[i];
|
||||
float result_avxf = g_compute_full_prob_float(&tc, 0);
|
||||
double result = 0;
|
||||
if (result_avxf < MIN_ACCEPTED) {
|
||||
double result_avxd = g_compute_full_prob_double(&tc, 0);
|
||||
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
|
||||
}
|
||||
else
|
||||
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
|
||||
|
||||
results_vec[i] = result;
|
||||
}
|
||||
total_time += (getCurrClk()-start_time);
|
||||
for(unsigned i=0;i<tc_vector.size();++i)
|
||||
{
|
||||
testcase& tc = tc_vector[i];
|
||||
double baseline_result = compute_full_prob<double>(&tc);
|
||||
baseline_result = log10(baseline_result) - log10(ldexp(1.0, 1020.0));
|
||||
double abs_error = fabs(baseline_result-results_vec[i]);
|
||||
double rel_error = (baseline_result != 0) ? fabs(abs_error/baseline_result) : 0;
|
||||
if(abs_error > 1e-5 && rel_error > 1e-5)
|
||||
cout << std::scientific << baseline_result << " "<<results_vec[i]<<"\n";
|
||||
delete tc_vector[i].rs;
|
||||
delete tc_vector[i].hap;
|
||||
}
|
||||
results_vec.clear();
|
||||
tc_vector.clear();
|
||||
}
|
||||
if(break_value < 0)
|
||||
break;
|
||||
tc_vector.push_back(tc);
|
||||
}
|
||||
vector<double> results_vec;
|
||||
results_vec.clear();
|
||||
results_vec.resize(tc_vector.size());
|
||||
double start_time = getCurrClk();
|
||||
#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12)
|
||||
for(unsigned i=0;i<tc_vector.size();++i)
|
||||
{
|
||||
testcase& tc = tc_vector[i];
|
||||
float result_avxf = g_compute_full_prob_float(&tc, 0);
|
||||
double result = 0;
|
||||
if (result_avxf < MIN_ACCEPTED) {
|
||||
double result_avxd = g_compute_full_prob_double(&tc, 0);
|
||||
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
|
||||
}
|
||||
else
|
||||
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
|
||||
|
||||
results_vec[i] = result;
|
||||
}
|
||||
cout << "Time taken "<<getCurrClk()-start_time << "\n";
|
||||
for(unsigned i=0;i<tc_vector.size();++i)
|
||||
{
|
||||
testcase& tc = tc_vector[i];
|
||||
double baseline_result = compute_full_prob<double>(&tc);
|
||||
baseline_result = log10(baseline_result) - log10(ldexp(1.0, 1020.0));
|
||||
double abs_error = fabs(baseline_result-results_vec[i]);
|
||||
double rel_error = (baseline_result != 0) ? fabs(abs_error/baseline_result) : 0;
|
||||
if(abs_error > 1e-5 && rel_error > 1e-5)
|
||||
cout << std::scientific << baseline_result << " "<<results_vec[i]<<"\n";
|
||||
delete tc_vector[i].rs;
|
||||
delete tc_vector[i].hap;
|
||||
}
|
||||
results_vec.clear();
|
||||
tc_vector.clear();
|
||||
cout << "Total time "<< total_time << "\n";
|
||||
if(use_old_read_testcase)
|
||||
fclose(fptr);
|
||||
else
|
||||
|
|
|
|||
|
|
@ -112,8 +112,10 @@ inline void GEN_INTRINSIC(GEN_INTRINSIC(computeDistVec, SIMD_TYPE), PRECISION) (
|
|||
|
||||
VEC_SHIFT_LEFT_1BIT(currMaskVecLow.vec) ;
|
||||
VEC_SHIFT_LEFT_1BIT(currMaskVecHigh.vec) ;
|
||||
|
||||
|
||||
#ifdef SIMD_TYPE_SSE
|
||||
_mm_empty();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -3,8 +3,11 @@
|
|||
#include "template.h"
|
||||
#include "vector_defs.h"
|
||||
|
||||
#define SIMD_TYPE avx
|
||||
#define SIMD_TYPE_AVX
|
||||
|
||||
#define BATCH_SIZE 10
|
||||
|
||||
#define BATCH_SIZE 10000
|
||||
#define RUN_HYBRID
|
||||
|
||||
int thread_level_parallelism_enabled = false ;
|
||||
|
|
@ -53,7 +56,7 @@ int main()
|
|||
exit(0);
|
||||
*/
|
||||
|
||||
testcase tc[BATCH_SIZE];
|
||||
testcase* tc = new testcase[BATCH_SIZE];
|
||||
float result[BATCH_SIZE], result_avxf;
|
||||
double result_avxd;
|
||||
//struct timeval start, end;
|
||||
|
|
@ -120,14 +123,15 @@ int main()
|
|||
|
||||
//gettimeofday(&start, NULL);
|
||||
lastClk = getCurrClk() ;
|
||||
for (int b=0;b<read_count;b++)
|
||||
printf("%E\n", result[b]);
|
||||
//for (int b=0;b<read_count;b++)
|
||||
//printf("%E\n", result[b]);
|
||||
//gettimeofday(&end, NULL);
|
||||
aggregateTimeWrite += (getCurrClk() - lastClk) ;
|
||||
//((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
|
||||
|
||||
}
|
||||
|
||||
delete tc;
|
||||
printf("AVX Read Time: %.2f\n", aggregateTimeRead);
|
||||
printf("AVX Compute Time: %.2f\n", aggregateTimeCompute);
|
||||
printf("AVX Write Time: %.2f\n", aggregateTimeWrite);
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ bool is_sse42_supported()
|
|||
|
||||
void initialize_function_pointers()
|
||||
{
|
||||
//if(false)
|
||||
if(is_avx_supported())
|
||||
{
|
||||
cout << "Using AVX accelerated implementation of PairHMM\n";
|
||||
|
|
|
|||
|
|
@ -23,4 +23,7 @@
|
|||
#include "define-sse-double.h"
|
||||
#include "vector_function_prototypes.h"
|
||||
|
||||
#undef SIMD_TYPE
|
||||
#undef SIMD_TYPE_AVX
|
||||
#undef SIMD_TYPE_SSE
|
||||
|
||||
|
|
|
|||
|
|
@ -1,18 +1,18 @@
|
|||
void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift,SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut);
|
||||
void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift_last,SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn);
|
||||
void GEN_INTRINSIC(GEN_INTRINSIC(precompute_masks_,SIMD_TYPE), PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]);
|
||||
void GEN_INTRINSIC(GEN_INTRINSIC(init_masks_for_row_,SIMD_TYPE), PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess);
|
||||
void GEN_INTRINSIC(GEN_INTRINSIC(update_masks_for_cols_,SIMD_TYPE), PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt);
|
||||
void GEN_INTRINSIC(GEN_INTRINSIC(computeDistVec,SIMD_TYPE), PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, _256_TYPE& distm, _256_TYPE& _1_distm, _256_TYPE& distmChosen);
|
||||
template<class NUMBER> void GEN_INTRINSIC(GEN_INTRINSIC(initializeVectors,SIMD_TYPE), PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context<NUMBER> ctx, testcase *tc, _256_TYPE *p_MM, _256_TYPE *p_GAPM, _256_TYPE *p_MX, _256_TYPE *p_XX, _256_TYPE *p_MY, _256_TYPE *p_YY, _256_TYPE *distm1D);
|
||||
template<class NUMBER> void GEN_INTRINSIC(GEN_INTRINSIC(stripINITIALIZATION,SIMD_TYPE), PRECISION)(
|
||||
inline void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift,SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut);
|
||||
inline void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift_last,SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn);
|
||||
inline void GEN_INTRINSIC(GEN_INTRINSIC(precompute_masks_,SIMD_TYPE), PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]);
|
||||
inline void GEN_INTRINSIC(GEN_INTRINSIC(init_masks_for_row_,SIMD_TYPE), PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess);
|
||||
inline void GEN_INTRINSIC(GEN_INTRINSIC(update_masks_for_cols_,SIMD_TYPE), PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt);
|
||||
inline void GEN_INTRINSIC(GEN_INTRINSIC(computeDistVec,SIMD_TYPE), PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, _256_TYPE& distm, _256_TYPE& _1_distm, _256_TYPE& distmChosen);
|
||||
template<class NUMBER> inline void GEN_INTRINSIC(GEN_INTRINSIC(initializeVectors,SIMD_TYPE), PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context<NUMBER> ctx, testcase *tc, _256_TYPE *p_MM, _256_TYPE *p_GAPM, _256_TYPE *p_MX, _256_TYPE *p_XX, _256_TYPE *p_MY, _256_TYPE *p_YY, _256_TYPE *distm1D);
|
||||
template<class NUMBER> inline void GEN_INTRINSIC(GEN_INTRINSIC(stripINITIALIZATION,SIMD_TYPE), PRECISION)(
|
||||
int stripIdx, Context<NUMBER> ctx, testcase *tc, _256_TYPE &pGAPM, _256_TYPE &pMM, _256_TYPE &pMX, _256_TYPE &pXX, _256_TYPE &pMY, _256_TYPE &pYY,
|
||||
_256_TYPE &rs, UNION_TYPE &rsN, _256_TYPE &distm, _256_TYPE &_1_distm, _256_TYPE *distm1D, _256_TYPE N_packed256, _256_TYPE *p_MM , _256_TYPE *p_GAPM ,
|
||||
_256_TYPE *p_MX, _256_TYPE *p_XX , _256_TYPE *p_MY, _256_TYPE *p_YY, UNION_TYPE &M_t_2, UNION_TYPE &X_t_2, UNION_TYPE &M_t_1, UNION_TYPE &X_t_1,
|
||||
UNION_TYPE &Y_t_2, UNION_TYPE &Y_t_1, UNION_TYPE &M_t_1_y, NUMBER* shiftOutX, NUMBER* shiftOutM);
|
||||
_256_TYPE GEN_INTRINSIC(GEN_INTRINSIC(computeDISTM,SIMD_TYPE), PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, _256_TYPE rs, UNION_TYPE rsN, _256_TYPE N_packed256,
|
||||
inline _256_TYPE GEN_INTRINSIC(GEN_INTRINSIC(computeDISTM,SIMD_TYPE), PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, _256_TYPE rs, UNION_TYPE rsN, _256_TYPE N_packed256,
|
||||
_256_TYPE distm, _256_TYPE _1_distm);
|
||||
void GEN_INTRINSIC(GEN_INTRINSIC(computeMXY,SIMD_TYPE), PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y,
|
||||
inline void GEN_INTRINSIC(GEN_INTRINSIC(computeMXY,SIMD_TYPE), PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y,
|
||||
UNION_TYPE M_t_2, UNION_TYPE X_t_2, UNION_TYPE Y_t_2, UNION_TYPE M_t_1, UNION_TYPE X_t_1, UNION_TYPE M_t_1_y, UNION_TYPE Y_t_1,
|
||||
_256_TYPE pMM, _256_TYPE pGAPM, _256_TYPE pMX, _256_TYPE pXX, _256_TYPE pMY, _256_TYPE pYY, _256_TYPE distmSel);
|
||||
template<class NUMBER> NUMBER GEN_INTRINSIC(GEN_INTRINSIC(compute_full_prob_,SIMD_TYPE), PRECISION) (testcase *tc, NUMBER *before_last_log = NULL);
|
||||
|
|
|
|||
|
|
@ -332,7 +332,7 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
|
|||
}
|
||||
|
||||
// initialize arrays to hold the probabilities of being in the match, insertion and deletion cases
|
||||
pairHMMThreadLocal.get().initialize(X_METRIC_LENGTH, Y_METRIC_LENGTH);
|
||||
pairHMMThreadLocal.get().initialize(haplotypes, perSampleReadList, X_METRIC_LENGTH, Y_METRIC_LENGTH);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ import org.broadinstitute.variant.variantcontext.Allele;
|
|||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -147,6 +148,52 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
|
|||
jniInitialize(readMaxLength, haplotypeMaxLength);
|
||||
}
|
||||
|
||||
private native void jniInitialize(final int numHaplotypes, final int numTotalReads, JNIHaplotypeDataHolderClass[] haplotypeDataArray,
|
||||
JNIReadDataHolderClass[] readDataArray);
|
||||
HashMap<String, Integer> sampleToIndex = new HashMap<String, Integer>();
|
||||
//Used to transfer data to JNI
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public void initialize( final List<Haplotype> haplotypes, final Map<String, List<GATKSAMRecord>> perSampleReadList, final int readMaxLength, final int haplotypeMaxLength ) {
|
||||
if(verify)
|
||||
super.initialize(haplotypes, perSampleReadList, readMaxLength, haplotypeMaxLength);
|
||||
/*
|
||||
int numHaplotypes = haplotypes.size();
|
||||
JNIHaplotypeDataHolderClass[] haplotypeDataArray = new JNIHaplotypeDataHolderClass[numHaplotypes];
|
||||
int idx = 0;
|
||||
for(final Haplotype currHaplotype : haplotypes)
|
||||
{
|
||||
haplotypeDataArray[idx] = new JNIHaplotypeDataHolderClass();
|
||||
haplotypeDataArray[idx].haplotypeBases = currHaplotype.getBases();
|
||||
++idx;
|
||||
}
|
||||
sampleToIndex.clear();
|
||||
int totalNumReads = 0;
|
||||
for(final Map.Entry<String, List<GATKSAMRecord>> currEntry : perSampleReadList)
|
||||
{
|
||||
sampleToIndex.put(currEntry.getKey(), totalNumReads);
|
||||
totalNumReads += currEntry.getValue().size();
|
||||
}
|
||||
|
||||
JNIHaplotypeDataHolderClass[] readDataArray = new JNIReadDataHolderClass[totalNumReads];
|
||||
idx = 0;
|
||||
for(final Map.Entry<String, List<GATKSAMRecord>> currEntry : perSampleReadList)
|
||||
{
|
||||
for(GATKSAMRecord read : currEntry.getValue())
|
||||
{
|
||||
readDataArray[idx] = new JNIReadDataHolderClass();
|
||||
readDataArray[idx].readBases = read.getReadBases();
|
||||
readDataArray[idx].readQuals = read.getBaseQualities();
|
||||
readDataArray[idx].insertionGOP = read.getBaseInsertionQualities();
|
||||
readDataArray[idx].deletionGOP = read.getBaseDeletionQualities();
|
||||
readDataArray[idx].overallGCP = GCPArrayMap.get(read);
|
||||
++idx;
|
||||
}
|
||||
}
|
||||
jniInitialize(numHaplotypes, numTotalReads, haplotypeDataArray, readDataArray);*/
|
||||
}
|
||||
|
||||
//Real compute kernel
|
||||
private native void jniComputeLikelihoods(int numReads, int numHaplotypes,
|
||||
|
|
|
|||
|
|
@ -100,6 +100,18 @@ public abstract class PairHMM {
|
|||
initialized = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize this PairHMM, making it suitable to run against a read and haplotype with given lengths
|
||||
* This function is used by the JNI implementations to transfer all data once to the native code
|
||||
* @param haplotypes the list of haplotypes
|
||||
* @param perSampleReadList map from sample name to list of reads
|
||||
* @param haplotypeMaxLength the max length of haplotypes we want to use with this PairHMM
|
||||
* @param readMaxLength the max length of reads we want to use with this PairHMM
|
||||
*/
|
||||
public void initialize( final List<Haplotype> haplotypes, final Map<String, List<GATKSAMRecord>> perSampleReadList, final int readMaxLength, final int haplotypeMaxLength ) {
|
||||
initialize(readMaxLength, haplotypeMaxLength);
|
||||
}
|
||||
|
||||
protected int findMaxReadLength(final List<GATKSAMRecord> reads) {
|
||||
int listMaxReadLength = 0;
|
||||
for(GATKSAMRecord read : reads){
|
||||
|
|
|
|||
Loading…
Reference in New Issue