diff --git a/PairHMM_JNI/JNI_README b/PairHMM_JNI/JNI_README index 9fc3aa8d1..3eb797ab6 100644 --- a/PairHMM_JNI/JNI_README +++ b/PairHMM_JNI/JNI_README @@ -1,33 +1,41 @@ Implementation overview: -Created a new Java class called JNILoglessPairHMM which extends LoglessPairHMM and +Created a new Java class called VectorLoglessPairHMM which extends LoglessPairHMM and overrides functions from both LoglessPairHMM and PairHMM. -1. Constructor: Call base class constructors. Then, load the JNI library located in this +1. Constructor: Call base class constructors. Then, load the native library located in this directory and call a global init function in the library to determine fields ids for the -members of classes JNIReadDataHolder and JNIHaplotypeDataHolders -2. initialize(): Override and do nothing as all matrix manipulation is done in the native -library. -3. computeLikelihoods(): Copies array references for readBases/quals etc and -haplotypeBases to arrays of JNIReadDataHolder and JNIHaplotypeDataHolders classes. Invokes -the JNI function to perform the computation and updates the likelihoodMap. +members of classes JNIReadDataHolder and JNIHaplotypeDataHolders. +2. When the library is loaded, it initializes two global function pointers to point to the +function implementation that is supported on the machine on which the program is being +run. The two pointers are for float and double respectively. This initialization is done +only once for the whole program. +3. initialize(): To initialized the region for PairHMM. Pass haplotype bases to native +code through the JNIHaplotypeDataHolders class. Since the haplotype list is common across +multiple samples in computeReadLikelihoods(), we can store the haplotype bases to the +native code once and re-use across multiple samples. +4. computeLikelihoods(): Copies array references for readBases/quals etc to array of +JNIReadDataHolder objects. Invokes the JNI function to perform the computation and +updates the likelihoodMap. -Note: Lots of debug code still left in the files, however, debug code is not executed -(hopefully, not even compiled into bytecode) because the debug flags are set to false. +Note: Debug code has been moved to a separate class DebugJNILoglessPairHMM.java. On the C++ side, the primary function called is -Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniComputeLikelihoods. It -uses standard JNI calls to get and return data from/to the Java class JNILoglessPairHMM. -The last argument to the function is the maximum number of OpenMP threads to use while -computing PairHMM in C++. This option is set when the native function call is made from -JNILoglessPairHMM computeLikelihoods - currently it is set to 12 (no logical reason). +Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods. It +uses standard JNI calls to get and return data from/to the Java class +VectorLoglessPairHMM. The last argument to the function is the maximum number of OpenMP +threads to use while computing PairHMM in C++. This option is set when the native function +call is made from JNILoglessPairHMM computeLikelihoods - currently it is set to 12 (no +logical reason). +Note: OpenMP has been disabled for now. Compiling: Make sure you have icc (Intel C compiler) available. Currently, gcc does not seem to support all AVX intrinsics. -Type 'make'. This should create a library called libJNILoglessPairHMM.so .Comment out -OMPCFLAGS if OpenMP is not desired. +Type 'make'. This should create a library called libVectorLoglessPairHMM.so Running: -If libJNILoglessPairHMM.so is compiled using icc, make sure that the Intel Composer XE +If libVectorLoglessPairHMM.so is compiled using icc, make sure that the Intel Composer XE libraries are in your LD_LIBRARY_PATH : source /bin/compilervars.sh intel64 -See run.sh in this directory on how to invoke HaplotypeCaller with the native library +See run.sh in this directory on how to invoke HaplotypeCaller with the native library. The +argument -Djava.library.path is needed if the native implementation is selected, else +unnecessary. diff --git a/PairHMM_JNI/LoadTimeInitializer.cc b/PairHMM_JNI/LoadTimeInitializer.cc index ae545d881..4188ea3fb 100644 --- a/PairHMM_JNI/LoadTimeInitializer.cc +++ b/PairHMM_JNI/LoadTimeInitializer.cc @@ -12,6 +12,7 @@ LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loa m_sumNumHaplotypes = 0; m_sumSquareNumHaplotypes = 0; m_sumNumTestcases = 0; + m_sumNumDoubleTestcases = 0; m_sumSquareNumTestcases = 0; m_sumReadLengths = 0; m_sumHaplotypeLengths = 0; @@ -19,8 +20,10 @@ LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loa m_sumSquareProductReadLengthHaplotypeLength = 0; m_maxNumTestcases = 0; m_num_invocations = 0; + m_compute_time = 0; m_data_transfer_time = 0; + m_bytes_copied = 0; m_filename_to_fptr.clear(); @@ -47,6 +50,8 @@ void LoadTimeInitializer::print_profiling() mean_val = m_sumProductReadLengthHaplotypeLength/m_sumNumTestcases; cout <<"productReadLengthHaplotypeLength\t"< m_filename_to_fptr; }; diff --git a/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc b/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc index 30f118c31..98ebcde92 100644 --- a/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc +++ b/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc @@ -117,7 +117,7 @@ Java_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_jniSubCompute assert(insertionGOPArray && "insertionGOP array not initialized in JNI"); assert(deletionGOPArray && "deletionGOP array not initialized in JNI"); assert(overallGCPArray && "OverallGCP array not initialized in JNI"); - assert(readLength < MROWS); + //assert(readLength < MROWS); #endif testcase tc; tc.rslen = readLength; diff --git a/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc b/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc index 5e77403f4..e837e5765 100644 --- a/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc +++ b/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc @@ -7,9 +7,15 @@ using namespace std; +JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniGetMachineType + (JNIEnv* env, jobject thisObject) +{ + return (jlong)get_machine_capabilities(); +} + //Should be called only once for the whole Java process - initializes field ids for the classes JNIReadDataHolderClass //and JNIHaplotypeDataHolderClass -JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniGlobalInit +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeClassFieldsAndMachineMask (JNIEnv* env, jobject thisObject, jclass readDataHolderClass, jclass haplotypeDataHolderClass, jlong mask) { assert(readDataHolderClass); @@ -34,6 +40,12 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless fid = env->GetFieldID(haplotypeDataHolderClass, "haplotypeBases", "[B"); assert(fid && "JNI pairHMM: Could not get FID for haplotypeBases"); g_load_time_initializer.m_haplotypeBasesFID = fid; + if(mask != ENABLE_ALL_HARDWARE_FEATURES) + { + cout << "Using user supplied hardware mask to re-initialize function pointers for PairHMM\n"; + initialize_function_pointers((uint64_t)mask); + cout.flush(); + } } //Since the list of haplotypes against which the reads are evaluated in PairHMM is the same for a region, @@ -81,6 +93,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless #endif #ifdef DO_PROFILING g_load_time_initializer.m_sumHaplotypeLengths += haplotypeBasesLength; + g_load_time_initializer.m_bytes_copied += (is_copy ? haplotypeBasesLength : 0); #endif } } @@ -139,6 +152,9 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless jbyte* insertionGOPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(insertionGOP, &is_copy); jbyte* deletionGOPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(deletionGOP, &is_copy); jbyte* overallGCPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(overallGCP, &is_copy); +#ifdef DO_PROFILING + g_load_time_initializer.m_bytes_copied += (is_copy ? readLength*5 : 0); +#endif #ifdef ENABLE_ASSERTIONS assert(readBasesArray && "readBasesArray not initialized in JNI"); assert(readQualsArray && "readQualsArray not initialized in JNI"); @@ -206,6 +222,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless assert(env->GetArrayLength(likelihoodArray) == numTestCases); #endif #ifdef DO_PROFILING + g_load_time_initializer.m_bytes_copied += (is_copy ? numTestCases*sizeof(double) : 0); start_time = get_time(); #endif #pragma omp parallel for schedule (dynamic,10) private(tc_idx) num_threads(maxNumThreadsToUse) @@ -216,6 +233,9 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless if (result_avxf < MIN_ACCEPTED) { double result_avxd = g_compute_full_prob_double(&(tc_array[tc_idx]), 0); result = log10(result_avxd) - log10(ldexp(1.0, 1020.0)); +#ifdef DO_PROFILING + ++(g_load_time_initializer.m_sumNumDoubleTestcases); +#endif } else result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f))); diff --git a/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.h b/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.h index 099751b79..3b07665f6 100644 --- a/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.h +++ b/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.h @@ -43,10 +43,10 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless /* * Class: org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM - * Method: jniGlobalInit + * Method: jniInitializeClassFieldsAndMachineMask * Signature: (Ljava/lang/Class;Ljava/lang/Class;J)V */ -JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniGlobalInit +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeClassFieldsAndMachineMask (JNIEnv *, jobject, jclass, jclass, jlong); /* diff --git a/PairHMM_JNI/run.sh b/PairHMM_JNI/run.sh index a1eacdefa..e66ce543f 100755 --- a/PairHMM_JNI/run.sh +++ b/PairHMM_JNI/run.sh @@ -10,8 +10,8 @@ fi #-Djava.library.path is needed if you are using JNI_LOGLESS_CACHING, else not needed java -Djava.library.path=${GSA_ROOT_DIR}/PairHMM_JNI/ -jar $GSA_ROOT_DIR/dist/GenomeAnalysisTK.jar -T HaplotypeCaller \ --dbsnp /data/broad/samples/joint_variant_calling/dbSNP/00-All.vcf \ --R /data/broad/samples/joint_variant_calling/broad_reference/human_g1k_v37_decoy.fasta \ --I /data/broad/samples/joint_variant_calling/NA12878_low_coverage_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam \ +-R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \ +-I /data/simulated/sim1M_pairs_final.bam \ -stand_call_conf 50.0 \ -stand_emit_conf 10.0 \ --pair_hmm_implementation $pair_hmm_implementation \ diff --git a/PairHMM_JNI/utils.cc b/PairHMM_JNI/utils.cc index 8eb417d94..dcc7835df 100644 --- a/PairHMM_JNI/utils.cc +++ b/PairHMM_JNI/utils.cc @@ -45,7 +45,7 @@ uint64_t get_machine_capabilities() void initialize_function_pointers(uint64_t mask) { - //if(false) + //mask = 0; if(is_avx_supported() && (mask & (1<< AVX_CUSTOM_IDX))) { cout << "Using AVX accelerated implementation of PairHMM\n"; diff --git a/PairHMM_JNI/utils.h b/PairHMM_JNI/utils.h index a09c0204c..783d75ae5 100644 --- a/PairHMM_JNI/utils.h +++ b/PairHMM_JNI/utils.h @@ -25,14 +25,16 @@ extern double (*g_compute_full_prob_double)(testcase *tc, double* before_last_lo void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline); template NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0); -void initialize_function_pointers(uint64_t mask=0xFFFFFFFFFFFFFFFFull); double getCurrClk(); uint64_t get_time(struct timespec* x=0); +//bit 0 is sse4.2, bit 1 is AVX enum ProcessorCapabilitiesEnum { SSE42_CUSTOM_IDX=0, AVX_CUSTOM_IDX }; +#define ENABLE_ALL_HARDWARE_FEATURES 0xFFFFFFFFFFFFFFFFull uint64_t get_machine_capabilities(); +void initialize_function_pointers(uint64_t mask=ENABLE_ALL_HARDWARE_FEATURES); #endif diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/DebugJNILoglessPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/DebugJNILoglessPairHMM.java index 55491bcd8..7e50f0c30 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/DebugJNILoglessPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/DebugJNILoglessPairHMM.java @@ -277,7 +277,7 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM { } ++numComputeLikelihoodCalls; //if(numComputeLikelihoodCalls == 5) - //jniPairHMM.jniClose(); + //jniPairHMM.close(); //System.exit(0); return likelihoodMap; } diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java index 9b0772941..f039cc295 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java @@ -59,4 +59,5 @@ import java.util.HashMap; public abstract class JNILoglessPairHMM extends LoglessPairHMM { public abstract HashMap getHaplotypeToHaplotypeListIdxMap(); protected long setupTime = 0; + } diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM.java index 6207c9679..192ebacc1 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM.java @@ -86,37 +86,45 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM { public byte[] haplotypeBases = null; } + /** + * Return 64-bit mask representing machine capabilities + * Bit 0 is LSB, bit 63 MSB + * Bit 0 represents sse4.2 availability + * Bit 1 represents AVX availability + */ public native long jniGetMachineType(); - private native void jniClose(); - private native void jniGlobalInit(Class readDataHolderClass, Class haplotypeDataHolderClass, long mask); + + /** + * Function to initialize the fields of JNIReadDataHolderClass and JNIHaplotypeDataHolderClass from JVM. + * C++ codegets FieldIDs for these classes once and re-uses these IDs for the remainder of the program. Field IDs do not + * change per JVM session + * @param readDataHolderClass class type of JNIReadDataHolderClass + * @param haplotypeDataHolderClass class type of JNIHaplotypeDataHolderClass + * @param mask mask is a 64 bit integer identical to the one received from jniGetMachineType(). Users can disable usage of some hardware features by zeroing some bits in the mask + * */ + private native void jniInitializeClassFieldsAndMachineMask(Class readDataHolderClass, Class haplotypeDataHolderClass, long mask); private static boolean isVectorLoglessPairHMMLibraryLoaded = false; + //The constructor is called only once inside PairHMMLikelihoodCalculationEngine public VectorLoglessPairHMM() { super(); + //Load the library and initialize the FieldIDs if(!isVectorLoglessPairHMMLibraryLoaded) { System.loadLibrary("VectorLoglessPairHMM"); isVectorLoglessPairHMMLibraryLoaded = true; - jniGlobalInit(JNIReadDataHolderClass.class, JNIHaplotypeDataHolderClass.class, enableAll); //need to do this only once + jniInitializeClassFieldsAndMachineMask(JNIReadDataHolderClass.class, JNIHaplotypeDataHolderClass.class, enableAll); //need to do this only once } } - @Override - public void close() - { - System.out.println("Time spent in setup for JNI call : "+setupTime); - super.close(); - jniClose(); - } - private native void jniInitializeHaplotypes(final int numHaplotypes, JNIHaplotypeDataHolderClass[] haplotypeDataArray); //Hold the mapping between haplotype and index in the list of Haplotypes passed to initialize //Use this mapping in computeLikelihoods to find the likelihood value corresponding to a given Haplotype private HashMap haplotypeToHaplotypeListIdxMap = new HashMap(); @Override public HashMap getHaplotypeToHaplotypeListIdxMap() { return haplotypeToHaplotypeListIdxMap; } + //Used to transfer data to JNI //Since the haplotypes are the same for all calls to computeLikelihoods within a region, transfer the haplotypes only once to the JNI per region - /** * {@inheritDoc} */ @@ -136,10 +144,12 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM { } jniInitializeHaplotypes(numHaplotypes, haplotypeDataArray); } - + /** + * Tell JNI to release arrays - really important if native code is directly accessing Java memory, if not + * accessing Java memory directly, still important to release memory from C++ + */ private native void jniFinalizeRegion(); - //Tell JNI to release arrays - really important if native code is directly accessing Java memory, if not - //accessing Java memory directly, still important to release memory from C++ + /** * {@inheritDoc} */ @@ -149,7 +159,9 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM { jniFinalizeRegion(); } - //Real compute kernel + /** + * Real compute kernel + */ private native void jniComputeLikelihoods(int numReads, int numHaplotypes, JNIReadDataHolderClass[] readDataArray, JNIHaplotypeDataHolderClass[] haplotypeDataArray, double[] likelihoodArray, int maxNumThreadsToUse); /** @@ -203,4 +215,16 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM { computeTime += (System.nanoTime() - startTime); return likelihoodMap; } + + /** + * Print final profiling information from native code + */ + public native void jniClose(); + @Override + public void close() + { + System.out.println("Time spent in setup for JNI call : "+(setupTime*1e-9)); + super.close(); + jniClose(); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java index 4ab17d5f3..254945af4 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java @@ -312,11 +312,16 @@ public abstract class PairHMM { return Math.min(haplotype1.length, haplotype2.length); } + /** + * Return the results of the computeLikelihoods function + */ public double[] getLikelihoodArray() { return mLikelihoodArray; } - //Called at the end of all HC calls + /** + * Called at the end of the program to close files, print profiling information etc + */ public void close() { if(doProfiling) - System.out.println("Total compute time in PairHMM computeLikelihoods() : "+computeTime); + System.out.println("Total compute time in PairHMM computeLikelihoods() : "+(computeTime*1e-9)); } }