1. Inserted #define in sandbox pairhmm-template-main.cc

2. Wrapped _mm_empty() with ifdef SIMD_TYPE_SSE
3. OpenMP disabled
4. Added code for initializing PairHMM's data inside initializePairHMM -
not used yet
This commit is contained in:
Karthik Gururaj 2014-01-21 09:57:14 -08:00
parent 28891117e2
commit 88c08e78e7
10 changed files with 126 additions and 51 deletions

Binary file not shown.

View File

@ -58,47 +58,53 @@ int main(int argc, char** argv)
vector<testcase> tc_vector;
tc_vector.clear();
testcase tc;
double total_time = 0;
while(1)
{
int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true);
if(break_value >= 0)
tc_vector.push_back(tc);
if(tc_vector.size() == BATCH_SIZE || (break_value < 0 && tc_vector.size() > 0))
{
vector<double> results_vec;
results_vec.clear();
results_vec.resize(tc_vector.size());
double start_time = getCurrClk();
#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12)
for(unsigned i=0;i<tc_vector.size();++i)
{
testcase& tc = tc_vector[i];
float result_avxf = g_compute_full_prob_float(&tc, 0);
double result = 0;
if (result_avxf < MIN_ACCEPTED) {
double result_avxd = g_compute_full_prob_double(&tc, 0);
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
}
else
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
results_vec[i] = result;
}
total_time += (getCurrClk()-start_time);
for(unsigned i=0;i<tc_vector.size();++i)
{
testcase& tc = tc_vector[i];
double baseline_result = compute_full_prob<double>(&tc);
baseline_result = log10(baseline_result) - log10(ldexp(1.0, 1020.0));
double abs_error = fabs(baseline_result-results_vec[i]);
double rel_error = (baseline_result != 0) ? fabs(abs_error/baseline_result) : 0;
if(abs_error > 1e-5 && rel_error > 1e-5)
cout << std::scientific << baseline_result << " "<<results_vec[i]<<"\n";
delete tc_vector[i].rs;
delete tc_vector[i].hap;
}
results_vec.clear();
tc_vector.clear();
}
if(break_value < 0)
break;
tc_vector.push_back(tc);
}
vector<double> results_vec;
results_vec.clear();
results_vec.resize(tc_vector.size());
double start_time = getCurrClk();
#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12)
for(unsigned i=0;i<tc_vector.size();++i)
{
testcase& tc = tc_vector[i];
float result_avxf = g_compute_full_prob_float(&tc, 0);
double result = 0;
if (result_avxf < MIN_ACCEPTED) {
double result_avxd = g_compute_full_prob_double(&tc, 0);
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
}
else
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
results_vec[i] = result;
}
cout << "Time taken "<<getCurrClk()-start_time << "\n";
for(unsigned i=0;i<tc_vector.size();++i)
{
testcase& tc = tc_vector[i];
double baseline_result = compute_full_prob<double>(&tc);
baseline_result = log10(baseline_result) - log10(ldexp(1.0, 1020.0));
double abs_error = fabs(baseline_result-results_vec[i]);
double rel_error = (baseline_result != 0) ? fabs(abs_error/baseline_result) : 0;
if(abs_error > 1e-5 && rel_error > 1e-5)
cout << std::scientific << baseline_result << " "<<results_vec[i]<<"\n";
delete tc_vector[i].rs;
delete tc_vector[i].hap;
}
results_vec.clear();
tc_vector.clear();
cout << "Total time "<< total_time << "\n";
if(use_old_read_testcase)
fclose(fptr);
else

View File

@ -112,8 +112,10 @@ inline void GEN_INTRINSIC(GEN_INTRINSIC(computeDistVec, SIMD_TYPE), PRECISION) (
VEC_SHIFT_LEFT_1BIT(currMaskVecLow.vec) ;
VEC_SHIFT_LEFT_1BIT(currMaskVecHigh.vec) ;
#ifdef SIMD_TYPE_SSE
_mm_empty();
#endif
}

View File

@ -3,8 +3,11 @@
#include "template.h"
#include "vector_defs.h"
#define SIMD_TYPE avx
#define SIMD_TYPE_AVX
#define BATCH_SIZE 10
#define BATCH_SIZE 10000
#define RUN_HYBRID
int thread_level_parallelism_enabled = false ;
@ -53,7 +56,7 @@ int main()
exit(0);
*/
testcase tc[BATCH_SIZE];
testcase* tc = new testcase[BATCH_SIZE];
float result[BATCH_SIZE], result_avxf;
double result_avxd;
//struct timeval start, end;
@ -120,14 +123,15 @@ int main()
//gettimeofday(&start, NULL);
lastClk = getCurrClk() ;
for (int b=0;b<read_count;b++)
printf("%E\n", result[b]);
//for (int b=0;b<read_count;b++)
//printf("%E\n", result[b]);
//gettimeofday(&end, NULL);
aggregateTimeWrite += (getCurrClk() - lastClk) ;
//((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
}
delete tc;
printf("AVX Read Time: %.2f\n", aggregateTimeRead);
printf("AVX Compute Time: %.2f\n", aggregateTimeCompute);
printf("AVX Write Time: %.2f\n", aggregateTimeWrite);

View File

@ -35,6 +35,7 @@ bool is_sse42_supported()
void initialize_function_pointers()
{
//if(false)
if(is_avx_supported())
{
cout << "Using AVX accelerated implementation of PairHMM\n";

View File

@ -23,4 +23,7 @@
#include "define-sse-double.h"
#include "vector_function_prototypes.h"
#undef SIMD_TYPE
#undef SIMD_TYPE_AVX
#undef SIMD_TYPE_SSE

View File

@ -1,18 +1,18 @@
void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift,SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut);
void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift_last,SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn);
void GEN_INTRINSIC(GEN_INTRINSIC(precompute_masks_,SIMD_TYPE), PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]);
void GEN_INTRINSIC(GEN_INTRINSIC(init_masks_for_row_,SIMD_TYPE), PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess);
void GEN_INTRINSIC(GEN_INTRINSIC(update_masks_for_cols_,SIMD_TYPE), PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt);
void GEN_INTRINSIC(GEN_INTRINSIC(computeDistVec,SIMD_TYPE), PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, _256_TYPE& distm, _256_TYPE& _1_distm, _256_TYPE& distmChosen);
template<class NUMBER> void GEN_INTRINSIC(GEN_INTRINSIC(initializeVectors,SIMD_TYPE), PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context<NUMBER> ctx, testcase *tc, _256_TYPE *p_MM, _256_TYPE *p_GAPM, _256_TYPE *p_MX, _256_TYPE *p_XX, _256_TYPE *p_MY, _256_TYPE *p_YY, _256_TYPE *distm1D);
template<class NUMBER> void GEN_INTRINSIC(GEN_INTRINSIC(stripINITIALIZATION,SIMD_TYPE), PRECISION)(
inline void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift,SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut);
inline void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift_last,SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn);
inline void GEN_INTRINSIC(GEN_INTRINSIC(precompute_masks_,SIMD_TYPE), PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]);
inline void GEN_INTRINSIC(GEN_INTRINSIC(init_masks_for_row_,SIMD_TYPE), PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess);
inline void GEN_INTRINSIC(GEN_INTRINSIC(update_masks_for_cols_,SIMD_TYPE), PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt);
inline void GEN_INTRINSIC(GEN_INTRINSIC(computeDistVec,SIMD_TYPE), PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, _256_TYPE& distm, _256_TYPE& _1_distm, _256_TYPE& distmChosen);
template<class NUMBER> inline void GEN_INTRINSIC(GEN_INTRINSIC(initializeVectors,SIMD_TYPE), PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context<NUMBER> ctx, testcase *tc, _256_TYPE *p_MM, _256_TYPE *p_GAPM, _256_TYPE *p_MX, _256_TYPE *p_XX, _256_TYPE *p_MY, _256_TYPE *p_YY, _256_TYPE *distm1D);
template<class NUMBER> inline void GEN_INTRINSIC(GEN_INTRINSIC(stripINITIALIZATION,SIMD_TYPE), PRECISION)(
int stripIdx, Context<NUMBER> ctx, testcase *tc, _256_TYPE &pGAPM, _256_TYPE &pMM, _256_TYPE &pMX, _256_TYPE &pXX, _256_TYPE &pMY, _256_TYPE &pYY,
_256_TYPE &rs, UNION_TYPE &rsN, _256_TYPE &distm, _256_TYPE &_1_distm, _256_TYPE *distm1D, _256_TYPE N_packed256, _256_TYPE *p_MM , _256_TYPE *p_GAPM ,
_256_TYPE *p_MX, _256_TYPE *p_XX , _256_TYPE *p_MY, _256_TYPE *p_YY, UNION_TYPE &M_t_2, UNION_TYPE &X_t_2, UNION_TYPE &M_t_1, UNION_TYPE &X_t_1,
UNION_TYPE &Y_t_2, UNION_TYPE &Y_t_1, UNION_TYPE &M_t_1_y, NUMBER* shiftOutX, NUMBER* shiftOutM);
_256_TYPE GEN_INTRINSIC(GEN_INTRINSIC(computeDISTM,SIMD_TYPE), PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, _256_TYPE rs, UNION_TYPE rsN, _256_TYPE N_packed256,
inline _256_TYPE GEN_INTRINSIC(GEN_INTRINSIC(computeDISTM,SIMD_TYPE), PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, _256_TYPE rs, UNION_TYPE rsN, _256_TYPE N_packed256,
_256_TYPE distm, _256_TYPE _1_distm);
void GEN_INTRINSIC(GEN_INTRINSIC(computeMXY,SIMD_TYPE), PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y,
inline void GEN_INTRINSIC(GEN_INTRINSIC(computeMXY,SIMD_TYPE), PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y,
UNION_TYPE M_t_2, UNION_TYPE X_t_2, UNION_TYPE Y_t_2, UNION_TYPE M_t_1, UNION_TYPE X_t_1, UNION_TYPE M_t_1_y, UNION_TYPE Y_t_1,
_256_TYPE pMM, _256_TYPE pGAPM, _256_TYPE pMX, _256_TYPE pXX, _256_TYPE pMY, _256_TYPE pYY, _256_TYPE distmSel);
template<class NUMBER> NUMBER GEN_INTRINSIC(GEN_INTRINSIC(compute_full_prob_,SIMD_TYPE), PRECISION) (testcase *tc, NUMBER *before_last_log = NULL);

View File

@ -332,7 +332,7 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
}
// initialize arrays to hold the probabilities of being in the match, insertion and deletion cases
pairHMMThreadLocal.get().initialize(X_METRIC_LENGTH, Y_METRIC_LENGTH);
pairHMMThreadLocal.get().initialize(haplotypes, perSampleReadList, X_METRIC_LENGTH, Y_METRIC_LENGTH);
}

View File

@ -57,6 +57,7 @@ import org.broadinstitute.variant.variantcontext.Allele;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
/**
@ -147,6 +148,52 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
jniInitialize(readMaxLength, haplotypeMaxLength);
}
private native void jniInitialize(final int numHaplotypes, final int numTotalReads, JNIHaplotypeDataHolderClass[] haplotypeDataArray,
JNIReadDataHolderClass[] readDataArray);
HashMap<String, Integer> sampleToIndex = new HashMap<String, Integer>();
//Used to transfer data to JNI
/**
* {@inheritDoc}
*/
@Override
public void initialize( final List<Haplotype> haplotypes, final Map<String, List<GATKSAMRecord>> perSampleReadList, final int readMaxLength, final int haplotypeMaxLength ) {
if(verify)
super.initialize(haplotypes, perSampleReadList, readMaxLength, haplotypeMaxLength);
/*
int numHaplotypes = haplotypes.size();
JNIHaplotypeDataHolderClass[] haplotypeDataArray = new JNIHaplotypeDataHolderClass[numHaplotypes];
int idx = 0;
for(final Haplotype currHaplotype : haplotypes)
{
haplotypeDataArray[idx] = new JNIHaplotypeDataHolderClass();
haplotypeDataArray[idx].haplotypeBases = currHaplotype.getBases();
++idx;
}
sampleToIndex.clear();
int totalNumReads = 0;
for(final Map.Entry<String, List<GATKSAMRecord>> currEntry : perSampleReadList)
{
sampleToIndex.put(currEntry.getKey(), totalNumReads);
totalNumReads += currEntry.getValue().size();
}
JNIHaplotypeDataHolderClass[] readDataArray = new JNIReadDataHolderClass[totalNumReads];
idx = 0;
for(final Map.Entry<String, List<GATKSAMRecord>> currEntry : perSampleReadList)
{
for(GATKSAMRecord read : currEntry.getValue())
{
readDataArray[idx] = new JNIReadDataHolderClass();
readDataArray[idx].readBases = read.getReadBases();
readDataArray[idx].readQuals = read.getBaseQualities();
readDataArray[idx].insertionGOP = read.getBaseInsertionQualities();
readDataArray[idx].deletionGOP = read.getBaseDeletionQualities();
readDataArray[idx].overallGCP = GCPArrayMap.get(read);
++idx;
}
}
jniInitialize(numHaplotypes, numTotalReads, haplotypeDataArray, readDataArray);*/
}
//Real compute kernel
private native void jniComputeLikelihoods(int numReads, int numHaplotypes,

View File

@ -100,6 +100,18 @@ public abstract class PairHMM {
initialized = true;
}
/**
* Initialize this PairHMM, making it suitable to run against a read and haplotype with given lengths
* This function is used by the JNI implementations to transfer all data once to the native code
* @param haplotypes the list of haplotypes
* @param perSampleReadList map from sample name to list of reads
* @param haplotypeMaxLength the max length of haplotypes we want to use with this PairHMM
* @param readMaxLength the max length of reads we want to use with this PairHMM
*/
public void initialize( final List<Haplotype> haplotypes, final Map<String, List<GATKSAMRecord>> perSampleReadList, final int readMaxLength, final int haplotypeMaxLength ) {
initialize(readMaxLength, haplotypeMaxLength);
}
protected int findMaxReadLength(final List<GATKSAMRecord> reads) {
int listMaxReadLength = 0;
for(GATKSAMRecord read : reads){