1. Added more profiling code

2. Modified JNI_README
This commit is contained in:
Karthik Gururaj 2014-01-27 14:32:44 -08:00
parent a14a11c0cf
commit 85a748860e
13 changed files with 115 additions and 47 deletions

View File

@ -1,33 +1,41 @@
Implementation overview:
Created a new Java class called JNILoglessPairHMM which extends LoglessPairHMM and
Created a new Java class called VectorLoglessPairHMM which extends LoglessPairHMM and
overrides functions from both LoglessPairHMM and PairHMM.
1. Constructor: Call base class constructors. Then, load the JNI library located in this
1. Constructor: Call base class constructors. Then, load the native library located in this
directory and call a global init function in the library to determine fields ids for the
members of classes JNIReadDataHolder and JNIHaplotypeDataHolders
2. initialize(): Override and do nothing as all matrix manipulation is done in the native
library.
3. computeLikelihoods(): Copies array references for readBases/quals etc and
haplotypeBases to arrays of JNIReadDataHolder and JNIHaplotypeDataHolders classes. Invokes
the JNI function to perform the computation and updates the likelihoodMap.
members of classes JNIReadDataHolder and JNIHaplotypeDataHolders.
2. When the library is loaded, it initializes two global function pointers to point to the
function implementation that is supported on the machine on which the program is being
run. The two pointers are for float and double respectively. This initialization is done
only once for the whole program.
3. initialize(): To initialized the region for PairHMM. Pass haplotype bases to native
code through the JNIHaplotypeDataHolders class. Since the haplotype list is common across
multiple samples in computeReadLikelihoods(), we can store the haplotype bases to the
native code once and re-use across multiple samples.
4. computeLikelihoods(): Copies array references for readBases/quals etc to array of
JNIReadDataHolder objects. Invokes the JNI function to perform the computation and
updates the likelihoodMap.
Note: Lots of debug code still left in the files, however, debug code is not executed
(hopefully, not even compiled into bytecode) because the debug flags are set to false.
Note: Debug code has been moved to a separate class DebugJNILoglessPairHMM.java.
On the C++ side, the primary function called is
Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniComputeLikelihoods. It
uses standard JNI calls to get and return data from/to the Java class JNILoglessPairHMM.
The last argument to the function is the maximum number of OpenMP threads to use while
computing PairHMM in C++. This option is set when the native function call is made from
JNILoglessPairHMM computeLikelihoods - currently it is set to 12 (no logical reason).
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods. It
uses standard JNI calls to get and return data from/to the Java class
VectorLoglessPairHMM. The last argument to the function is the maximum number of OpenMP
threads to use while computing PairHMM in C++. This option is set when the native function
call is made from JNILoglessPairHMM computeLikelihoods - currently it is set to 12 (no
logical reason).
Note: OpenMP has been disabled for now.
Compiling:
Make sure you have icc (Intel C compiler) available. Currently, gcc does not seem to
support all AVX intrinsics.
Type 'make'. This should create a library called libJNILoglessPairHMM.so .Comment out
OMPCFLAGS if OpenMP is not desired.
Type 'make'. This should create a library called libVectorLoglessPairHMM.so
Running:
If libJNILoglessPairHMM.so is compiled using icc, make sure that the Intel Composer XE
If libVectorLoglessPairHMM.so is compiled using icc, make sure that the Intel Composer XE
libraries are in your LD_LIBRARY_PATH :
source <COMPOSER_XE_DIR>/bin/compilervars.sh intel64
See run.sh in this directory on how to invoke HaplotypeCaller with the native library
See run.sh in this directory on how to invoke HaplotypeCaller with the native library. The
argument -Djava.library.path is needed if the native implementation is selected, else
unnecessary.

View File

@ -12,6 +12,7 @@ LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loa
m_sumNumHaplotypes = 0;
m_sumSquareNumHaplotypes = 0;
m_sumNumTestcases = 0;
m_sumNumDoubleTestcases = 0;
m_sumSquareNumTestcases = 0;
m_sumReadLengths = 0;
m_sumHaplotypeLengths = 0;
@ -19,8 +20,10 @@ LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loa
m_sumSquareProductReadLengthHaplotypeLength = 0;
m_maxNumTestcases = 0;
m_num_invocations = 0;
m_compute_time = 0;
m_data_transfer_time = 0;
m_bytes_copied = 0;
m_filename_to_fptr.clear();
@ -47,6 +50,8 @@ void LoadTimeInitializer::print_profiling()
mean_val = m_sumProductReadLengthHaplotypeLength/m_sumNumTestcases;
cout <<"productReadLengthHaplotypeLength\t"<<m_sumProductReadLengthHaplotypeLength<<"\t"<<m_sumSquareProductReadLengthHaplotypeLength<<"\t"
<<mean_val<<"\t"<<(m_sumSquareProductReadLengthHaplotypeLength/m_sumNumTestcases)-mean_val*mean_val<<"\n";
cout <<"numDoubleTestcases\t"<<m_sumNumDoubleTestcases<<"\n";
cout <<"numBytesCopied\t"<<m_bytes_copied<<"\n";
cout.flush();
}

View File

@ -23,15 +23,18 @@ class LoadTimeInitializer
double m_sumSquareNumHaplotypes;
double m_sumNumTestcases;
double m_sumSquareNumTestcases;
uint64_t m_sumNumDoubleTestcases;
double m_sumReadLengths;
double m_sumHaplotypeLengths;
double m_sumProductReadLengthHaplotypeLength;
double m_sumSquareProductReadLengthHaplotypeLength;
unsigned m_maxNumTestcases;
unsigned m_num_invocations;
//timing
//timing in nanoseconds
uint64_t m_compute_time;
uint64_t m_data_transfer_time;
//bytes copied
uint64_t m_bytes_copied;
private:
std::map<std::string, std::ofstream*> m_filename_to_fptr;
};

View File

@ -117,7 +117,7 @@ Java_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_jniSubCompute
assert(insertionGOPArray && "insertionGOP array not initialized in JNI");
assert(deletionGOPArray && "deletionGOP array not initialized in JNI");
assert(overallGCPArray && "OverallGCP array not initialized in JNI");
assert(readLength < MROWS);
//assert(readLength < MROWS);
#endif
testcase tc;
tc.rslen = readLength;

View File

@ -7,9 +7,15 @@
using namespace std;
JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniGetMachineType
(JNIEnv* env, jobject thisObject)
{
return (jlong)get_machine_capabilities();
}
//Should be called only once for the whole Java process - initializes field ids for the classes JNIReadDataHolderClass
//and JNIHaplotypeDataHolderClass
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniGlobalInit
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeClassFieldsAndMachineMask
(JNIEnv* env, jobject thisObject, jclass readDataHolderClass, jclass haplotypeDataHolderClass, jlong mask)
{
assert(readDataHolderClass);
@ -34,6 +40,12 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
fid = env->GetFieldID(haplotypeDataHolderClass, "haplotypeBases", "[B");
assert(fid && "JNI pairHMM: Could not get FID for haplotypeBases");
g_load_time_initializer.m_haplotypeBasesFID = fid;
if(mask != ENABLE_ALL_HARDWARE_FEATURES)
{
cout << "Using user supplied hardware mask to re-initialize function pointers for PairHMM\n";
initialize_function_pointers((uint64_t)mask);
cout.flush();
}
}
//Since the list of haplotypes against which the reads are evaluated in PairHMM is the same for a region,
@ -81,6 +93,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
#endif
#ifdef DO_PROFILING
g_load_time_initializer.m_sumHaplotypeLengths += haplotypeBasesLength;
g_load_time_initializer.m_bytes_copied += (is_copy ? haplotypeBasesLength : 0);
#endif
}
}
@ -139,6 +152,9 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
jbyte* insertionGOPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(insertionGOP, &is_copy);
jbyte* deletionGOPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(deletionGOP, &is_copy);
jbyte* overallGCPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(overallGCP, &is_copy);
#ifdef DO_PROFILING
g_load_time_initializer.m_bytes_copied += (is_copy ? readLength*5 : 0);
#endif
#ifdef ENABLE_ASSERTIONS
assert(readBasesArray && "readBasesArray not initialized in JNI");
assert(readQualsArray && "readQualsArray not initialized in JNI");
@ -206,6 +222,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
assert(env->GetArrayLength(likelihoodArray) == numTestCases);
#endif
#ifdef DO_PROFILING
g_load_time_initializer.m_bytes_copied += (is_copy ? numTestCases*sizeof(double) : 0);
start_time = get_time();
#endif
#pragma omp parallel for schedule (dynamic,10) private(tc_idx) num_threads(maxNumThreadsToUse)
@ -216,6 +233,9 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
if (result_avxf < MIN_ACCEPTED) {
double result_avxd = g_compute_full_prob_double(&(tc_array[tc_idx]), 0);
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
#ifdef DO_PROFILING
++(g_load_time_initializer.m_sumNumDoubleTestcases);
#endif
}
else
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));

View File

@ -43,10 +43,10 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
/*
* Class: org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM
* Method: jniGlobalInit
* Method: jniInitializeClassFieldsAndMachineMask
* Signature: (Ljava/lang/Class;Ljava/lang/Class;J)V
*/
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniGlobalInit
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeClassFieldsAndMachineMask
(JNIEnv *, jobject, jclass, jclass, jlong);
/*

View File

@ -10,8 +10,8 @@ fi
#-Djava.library.path is needed if you are using JNI_LOGLESS_CACHING, else not needed
java -Djava.library.path=${GSA_ROOT_DIR}/PairHMM_JNI/ -jar $GSA_ROOT_DIR/dist/GenomeAnalysisTK.jar -T HaplotypeCaller \
--dbsnp /data/broad/samples/joint_variant_calling/dbSNP/00-All.vcf \
-R /data/broad/samples/joint_variant_calling/broad_reference/human_g1k_v37_decoy.fasta \
-I /data/broad/samples/joint_variant_calling/NA12878_low_coverage_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam \
-R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \
-I /data/simulated/sim1M_pairs_final.bam \
-stand_call_conf 50.0 \
-stand_emit_conf 10.0 \
--pair_hmm_implementation $pair_hmm_implementation \

View File

@ -45,7 +45,7 @@ uint64_t get_machine_capabilities()
void initialize_function_pointers(uint64_t mask)
{
//if(false)
//mask = 0;
if(is_avx_supported() && (mask & (1<< AVX_CUSTOM_IDX)))
{
cout << "Using AVX accelerated implementation of PairHMM\n";

View File

@ -25,14 +25,16 @@ extern double (*g_compute_full_prob_double)(testcase *tc, double* before_last_lo
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline);
template<class NUMBER>
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0);
void initialize_function_pointers(uint64_t mask=0xFFFFFFFFFFFFFFFFull);
double getCurrClk();
uint64_t get_time(struct timespec* x=0);
//bit 0 is sse4.2, bit 1 is AVX
enum ProcessorCapabilitiesEnum
{
SSE42_CUSTOM_IDX=0,
AVX_CUSTOM_IDX
};
#define ENABLE_ALL_HARDWARE_FEATURES 0xFFFFFFFFFFFFFFFFull
uint64_t get_machine_capabilities();
void initialize_function_pointers(uint64_t mask=ENABLE_ALL_HARDWARE_FEATURES);
#endif

View File

@ -277,7 +277,7 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
}
++numComputeLikelihoodCalls;
//if(numComputeLikelihoodCalls == 5)
//jniPairHMM.jniClose();
//jniPairHMM.close();
//System.exit(0);
return likelihoodMap;
}

View File

@ -59,4 +59,5 @@ import java.util.HashMap;
public abstract class JNILoglessPairHMM extends LoglessPairHMM {
public abstract HashMap<Haplotype, Integer> getHaplotypeToHaplotypeListIdxMap();
protected long setupTime = 0;
}

View File

@ -86,37 +86,45 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
public byte[] haplotypeBases = null;
}
/**
* Return 64-bit mask representing machine capabilities
* Bit 0 is LSB, bit 63 MSB
* Bit 0 represents sse4.2 availability
* Bit 1 represents AVX availability
*/
public native long jniGetMachineType();
private native void jniClose();
private native void jniGlobalInit(Class<?> readDataHolderClass, Class<?> haplotypeDataHolderClass, long mask);
/**
* Function to initialize the fields of JNIReadDataHolderClass and JNIHaplotypeDataHolderClass from JVM.
* C++ codegets FieldIDs for these classes once and re-uses these IDs for the remainder of the program. Field IDs do not
* change per JVM session
* @param readDataHolderClass class type of JNIReadDataHolderClass
* @param haplotypeDataHolderClass class type of JNIHaplotypeDataHolderClass
* @param mask mask is a 64 bit integer identical to the one received from jniGetMachineType(). Users can disable usage of some hardware features by zeroing some bits in the mask
* */
private native void jniInitializeClassFieldsAndMachineMask(Class<?> readDataHolderClass, Class<?> haplotypeDataHolderClass, long mask);
private static boolean isVectorLoglessPairHMMLibraryLoaded = false;
//The constructor is called only once inside PairHMMLikelihoodCalculationEngine
public VectorLoglessPairHMM() {
super();
//Load the library and initialize the FieldIDs
if(!isVectorLoglessPairHMMLibraryLoaded) {
System.loadLibrary("VectorLoglessPairHMM");
isVectorLoglessPairHMMLibraryLoaded = true;
jniGlobalInit(JNIReadDataHolderClass.class, JNIHaplotypeDataHolderClass.class, enableAll); //need to do this only once
jniInitializeClassFieldsAndMachineMask(JNIReadDataHolderClass.class, JNIHaplotypeDataHolderClass.class, enableAll); //need to do this only once
}
}
@Override
public void close()
{
System.out.println("Time spent in setup for JNI call : "+setupTime);
super.close();
jniClose();
}
private native void jniInitializeHaplotypes(final int numHaplotypes, JNIHaplotypeDataHolderClass[] haplotypeDataArray);
//Hold the mapping between haplotype and index in the list of Haplotypes passed to initialize
//Use this mapping in computeLikelihoods to find the likelihood value corresponding to a given Haplotype
private HashMap<Haplotype,Integer> haplotypeToHaplotypeListIdxMap = new HashMap<Haplotype,Integer>();
@Override
public HashMap<Haplotype, Integer> getHaplotypeToHaplotypeListIdxMap() { return haplotypeToHaplotypeListIdxMap; }
//Used to transfer data to JNI
//Since the haplotypes are the same for all calls to computeLikelihoods within a region, transfer the haplotypes only once to the JNI per region
/**
* {@inheritDoc}
*/
@ -136,10 +144,12 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
}
jniInitializeHaplotypes(numHaplotypes, haplotypeDataArray);
}
/**
* Tell JNI to release arrays - really important if native code is directly accessing Java memory, if not
* accessing Java memory directly, still important to release memory from C++
*/
private native void jniFinalizeRegion();
//Tell JNI to release arrays - really important if native code is directly accessing Java memory, if not
//accessing Java memory directly, still important to release memory from C++
/**
* {@inheritDoc}
*/
@ -149,7 +159,9 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
jniFinalizeRegion();
}
//Real compute kernel
/**
* Real compute kernel
*/
private native void jniComputeLikelihoods(int numReads, int numHaplotypes, JNIReadDataHolderClass[] readDataArray,
JNIHaplotypeDataHolderClass[] haplotypeDataArray, double[] likelihoodArray, int maxNumThreadsToUse);
/**
@ -203,4 +215,16 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
computeTime += (System.nanoTime() - startTime);
return likelihoodMap;
}
/**
* Print final profiling information from native code
*/
public native void jniClose();
@Override
public void close()
{
System.out.println("Time spent in setup for JNI call : "+(setupTime*1e-9));
super.close();
jniClose();
}
}

View File

@ -312,11 +312,16 @@ public abstract class PairHMM {
return Math.min(haplotype1.length, haplotype2.length);
}
/**
* Return the results of the computeLikelihoods function
*/
public double[] getLikelihoodArray() { return mLikelihoodArray; }
//Called at the end of all HC calls
/**
* Called at the end of the program to close files, print profiling information etc
*/
public void close()
{
if(doProfiling)
System.out.println("Total compute time in PairHMM computeLikelihoods() : "+computeTime);
System.out.println("Total compute time in PairHMM computeLikelihoods() : "+(computeTime*1e-9));
}
}