1. Added the catch UnsatisfiedLinkError exception in

PairHMMLikelihoodCalculationEngine.java to fall back to LOGLESS_CACHING
in case the native library could not be loaded. Made
VECTOR_LOGLESS_CACHING as the default implementation.
2. Updated the README with Mauricio's comments
3. baseline.cc is used within the library - if the machine supports
neither AVX nor SSE4.1, the native library falls back to un-vectorized
C++ in baseline.cc.
4. pairhmm-1-base.cc: This is not part of the library, but is being
heavily used for debugging/profiling. Can I request that we keep it
there for now? In the next release, we can delete it from the
repository.
5. I agree with Mauricio about the ifdefs. I am sure you already know,
but just to reassure you the debug code is not compiled into the library
(because of the ifdefs) and will not affect performance.
This commit is contained in:
Karthik Gururaj 2014-02-28 08:59:55 -08:00
parent 2d0ce45bb0
commit 37526dfad5
5 changed files with 106 additions and 69 deletions

View File

@ -426,7 +426,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
*/ */
@Hidden @Hidden
@Argument(fullName = "pair_hmm_implementation", shortName = "pairHMM", doc = "The PairHMM implementation to use for genotype likelihood calculations", required = false) @Argument(fullName = "pair_hmm_implementation", shortName = "pairHMM", doc = "The PairHMM implementation to use for genotype likelihood calculations", required = false)
public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING; public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.VECTOR_LOGLESS_CACHING;
@Hidden @Hidden
@Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false) @Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false)

View File

@ -81,29 +81,37 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
private final boolean noFpga; private final boolean noFpga;
private final ThreadLocal<PairHMM> pairHMMThreadLocal = new ThreadLocal<PairHMM>() { private final ThreadLocal<PairHMM> pairHMMThreadLocal = new ThreadLocal<PairHMM>() {
@Override @Override
protected PairHMM initialValue() { protected PairHMM initialValue() {
switch (hmmType) { switch (hmmType) {
case EXACT: return new Log10PairHMM(true); case EXACT: return new Log10PairHMM(true);
case ORIGINAL: return new Log10PairHMM(false); case ORIGINAL: return new Log10PairHMM(false);
case LOGLESS_CACHING: case LOGLESS_CACHING:
if (noFpga || !CnyPairHMM.isAvailable()) if (noFpga || !CnyPairHMM.isAvailable())
return new LoglessPairHMM(); return new LoglessPairHMM();
else else
return new CnyPairHMM(); return new CnyPairHMM();
case VECTOR_LOGLESS_CACHING: case VECTOR_LOGLESS_CACHING:
return new VectorLoglessPairHMM(); try
case DEBUG_VECTOR_LOGLESS_CACHING: {
return new DebugJNILoglessPairHMM(PairHMM.HMM_IMPLEMENTATION.VECTOR_LOGLESS_CACHING); return new VectorLoglessPairHMM();
case ARRAY_LOGLESS: }
if (noFpga || !CnyPairHMM.isAvailable()) catch(UnsatisfiedLinkError ule)
return new ArrayLoglessPairHMM(); {
else logger.info("Failed to load native library for VectorLoglessPairHMM - using Java implementation of LOGLESS_CACHING");
return new CnyPairHMM(); return new LoglessPairHMM();
default: }
throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the HaplotypeCaller. Acceptable options are ORIGINAL, EXACT, CACHING, LOGLESS_CACHING, and ARRAY_LOGLESS."); case DEBUG_VECTOR_LOGLESS_CACHING:
} return new DebugJNILoglessPairHMM(PairHMM.HMM_IMPLEMENTATION.VECTOR_LOGLESS_CACHING);
} case ARRAY_LOGLESS:
if (noFpga || !CnyPairHMM.isAvailable())
return new ArrayLoglessPairHMM();
else
return new CnyPairHMM();
default:
throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the HaplotypeCaller. Acceptable options are ORIGINAL, EXACT, CACHING, LOGLESS_CACHING, and ARRAY_LOGLESS.");
}
}
}; };
// Attempted to do as below, to avoid calling pairHMMThreadLocal.get() later on, but it resulted in a NullPointerException // Attempted to do as below, to avoid calling pairHMMThreadLocal.get() later on, but it resulted in a NullPointerException
// private final PairHMM pairHMM = pairHMMThreadLocal.get(); // private final PairHMM pairHMM = pairHMMThreadLocal.get();
@ -340,7 +348,7 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
private void finalizePairHMM() private void finalizePairHMM()
{ {
pairHMMThreadLocal.get().finalizeRegion(); pairHMMThreadLocal.get().finalizeRegion();
} }
@ -360,9 +368,9 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
map.filterPoorlyModelledReads(EXPECTED_ERROR_RATE_PER_BASE); map.filterPoorlyModelledReads(EXPECTED_ERROR_RATE_PER_BASE);
stratifiedReadMap.put(sampleEntry.getKey(), map); stratifiedReadMap.put(sampleEntry.getKey(), map);
} }
//Used mostly by the JNI implementation(s) to free arrays //Used mostly by the JNI implementation(s) to free arrays
finalizePairHMM(); finalizePairHMM();
return stratifiedReadMap; return stratifiedReadMap;
} }

View File

@ -97,8 +97,9 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
/** /**
* Return 64-bit mask representing machine capabilities * Return 64-bit mask representing machine capabilities
* Bit 0 is LSB, bit 63 MSB * Bit 0 is LSB, bit 63 MSB
* Bit 0 represents sse4.2 availability * Bit 0 represents sse4.1 availability
* Bit 1 represents AVX availability * Bit 1 represents sse4.2 availability
* Bit 2 represents AVX availability
*/ */
public native long jniGetMachineType(); public native long jniGetMachineType();
@ -125,6 +126,7 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
//Useful if someone builds his/her own library and wants to override the bundled //Useful if someone builds his/her own library and wants to override the bundled
//implementation without modifying the Java code //implementation without modifying the Java code
System.loadLibrary("VectorLoglessPairHMM"); System.loadLibrary("VectorLoglessPairHMM");
logger.info("libVectorLoglessPairHMM found in JVM library path");
} }
catch(UnsatisfiedLinkError ule) catch(UnsatisfiedLinkError ule)
{ {
@ -133,6 +135,7 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
{ {
logger.info("libVectorLoglessPairHMM not found in JVM library path - trying to unpack from StingUtils.jar"); logger.info("libVectorLoglessPairHMM not found in JVM library path - trying to unpack from StingUtils.jar");
loadLibraryFromJar("/org/broadinstitute/sting/utils/pairhmm/libVectorLoglessPairHMM.so"); loadLibraryFromJar("/org/broadinstitute/sting/utils/pairhmm/libVectorLoglessPairHMM.so");
logger.info("libVectorLoglessPairHMM unpacked successfully from StingUtils.jar");
} }
catch(IOException ioe) catch(IOException ioe)
{ {

View File

@ -2,47 +2,70 @@ Implementation overview:
Created a new Java class called VectorLoglessPairHMM which extends LoglessPairHMM and Created a new Java class called VectorLoglessPairHMM which extends LoglessPairHMM and
overrides functions from both LoglessPairHMM and PairHMM. overrides functions from both LoglessPairHMM and PairHMM.
1. Constructor: Call base class constructors. Then, load the native library located in this 1. Constructor: Call base class constructors. Then, load the native library located in this
directory and call a global init function in the library to determine fields ids for the directory and call an init function (with suffix 'jniInitializeClassFieldsAndMachineMask') in the
members of classes JNIReadDataHolder and JNIHaplotypeDataHolders. library to determine fields ids for the members of classes JNIReadDataHolder and
2. When the library is loaded, it initializes two global function pointers to point to the JNIHaplotypeDataHolders. The native code stores the field ids (struct offsets) for the classes and
function implementation that is supported on the machine on which the program is being re-uses them for subsequent computations. Optionally, the user can disable the vector
run. The two pointers are for float and double respectively. This initialization is done implementation, by using the 'mask' argument (see comments for a more detailed explanation).
only once for the whole program. 2. When the library is loaded, it invokes the constructor of the class LoadTimeInitializer (because
3. initialize(): To initialized the region for PairHMM. Pass haplotype bases to native a global variable g_load_time_initializer is declared in the library). This constructor
code through the JNIHaplotypeDataHolders class. Since the haplotype list is common across multiple (LoadTimeInitializer.cc) can be used to perform various initializations. Currently, it initializes
samples in computeReadLikelihoods(), we can store the haplotype bases to the native code once and two global function pointers to point to the function implementation that is supported on the
re-use across multiple samples. machine (AVX/SSE/un-vectorized) on which the program is being run. The two pointers are for float
and double respectively. The global function pointers are declared in utils.cc and are assigned in
the function initialize_function_pointers() defined in utils.cc and invoked from the constructor of
LoadTimeInitializer.
Other initializations in LoadTimeInitializer:
* ConvertChar::init - sets some masks for the vector implementation
* FTZ for performance
* stat counters = 0
* debug structs (which are never used in non-debug mode)
This initialization is done only once for the whole program.
3. initialize(): To initialize the region for PairHMM. Pass haplotype bases to native code through
the JNIHaplotypeDataHolder class. Since the haplotype list is common across multiple samples in
computeReadLikelihoods(), we can pass the haplotype bases to the native code once and re-use across
multiple samples.
4. computeLikelihoods(): Copies array references for readBases/quals etc to array of 4. computeLikelihoods(): Copies array references for readBases/quals etc to array of
JNIReadDataHolder objects. Invokes the JNI function to perform the computation and JNIReadDataHolder objects. Invokes the JNI function to perform the computation and updates the
updates the likelihoodMap. likelihoodMap.
The JNI function copies the byte array references into an array of testcase structs and invokes the
compute_full_prob function through the function pointers initialized earlier.
The primary native function called is
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods. It uses
standard JNI calls to get and return data from/to the Java class VectorLoglessPairHMM. The last
argument to the function is the maximum number of OpenMP threads to use while computing PairHMM in
C++. This option is set when the native function call is made from JNILoglessPairHMM
computeLikelihoods - currently it is set to 12 (no logical reason).
Note: OpenMP has been disabled for now - insufficient #testcases per call to computeLikelihoods() to
justify multi-threading.
5. finalizeRegion(): Releases the haplotype arrays initialized in step 3 - should be called at the
end of every region (line 351 in PairHMMLikelihoodCalculationEngine).
Note: Debug code has been moved to a separate class DebugJNILoglessPairHMM.java. Note: Debug code has been moved to a separate class DebugJNILoglessPairHMM.java.
On the C++ side, the primary function called is
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods. It
uses standard JNI calls to get and return data from/to the Java class
VectorLoglessPairHMM. The last argument to the function is the maximum number of OpenMP
threads to use while computing PairHMM in C++. This option is set when the native function
call is made from JNILoglessPairHMM computeLikelihoods - currently it is set to 12 (no
logical reason).
Note: OpenMP has been disabled for now.
Compiling: Compiling:
Make sure you have icc (Intel C compiler) available. Currently, gcc does not seem to Make sure you have icc (Intel C compiler) available. Currently, gcc does not seem to support all AVX
support all AVX intrinsics. intrinsics.
Type 'make'. This should create a library called libVectorLoglessPairHMM.so This native library is called libVectorLoglessPairHMM.so
Using Maven:
Type 'mvn install' in this directory - this will build the library (by invoking 'make') and copy the
native library to the directory
${sting-utils.basedir}/src/main/resources/org/broadinstitute/sting/utils/pairhmm
The GATK maven build process (when run) will bundle the library into the StingUtils jar file from
the copied directory.
Simple build:
cd src/main/c++
make
Running: Running:
The default implementation of PairHMM is still LOGLESS_CACHING in HaplotypeCaller.java. To use the The default implementation of PairHMM is now VECTOR_LOGLESS_CACHING in HaplotypeCaller.java. To use
native version, use the command line argument "--pair_hmm_implementation VECTOR_LOGLESS_CACHING" the Java version, use the command line argument "--pair_hmm_implementation LOGLESS_CACHING". (see
(see run.sh in src/main/c++). run.sh in src/main/c++).
The native library is bundled with the StingUtils jar file. When HaplotypeCaller is invoked with the The native library is bundled with the StingUtils jar file. When HaplotypeCaller is invoked, then
VectorLoglessPairHMM implementation (see run.sh in the directory src/main/c++), then the library is the library is unpacked from the jar file, copied to the /tmp directory (with a unique id) and
unpacked from the jar file, copied to the /tmp directory (with a unique id) and loaded by the Java class loaded by the Java class VectorLoglessPairHMM in the constructor (if it has not been loaded
VectorLoglessPairHMM in the constructor (if it has not been loaded already). already).
The default library can be overridden by using the -Djava.library.path argument for the JVM to pass The default library can be overridden by using the -Djava.library.path argument (see
the path to the library. If the library libVectorLoglessPairHMM.so can be found in src/main/c++/run.sh for an example) for the JVM to pass the path to the library. If the library
java.library.path, then it is loaded and the 'packed' library is not used. libVectorLoglessPairHMM.so can be found in java.library.path, then it is loaded and the 'packed'
See run.sh in this directory on how to invoke HaplotypeCaller with the vector implementation of library is not used.
PairHMM. The argument -Djava.library.path is needed if you wish to override the default packed
library, else unnecessary.

View File

@ -98,6 +98,8 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
} }
} }
//Create a vector of testcases for computation - copy the references to bytearrays read/readQuals etc into the appropriate
//testcase struct
inline JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeTestcasesVector inline JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeTestcasesVector
(JNIEnv* env, jint numReads, jint numHaplotypes, jobjectArray& readDataArray, (JNIEnv* env, jint numReads, jint numHaplotypes, jobjectArray& readDataArray,
vector<vector<pair<jbyteArray,jbyte*> > >& readBasesArrayVector, vector<testcase>& tc_array) vector<vector<pair<jbyteArray,jbyte*> > >& readBasesArrayVector, vector<testcase>& tc_array)
@ -272,12 +274,13 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
g_load_time_initializer.m_data_transfer_time += diff_time(start_time); g_load_time_initializer.m_data_transfer_time += diff_time(start_time);
#endif #endif
//Get double array where results are stored (to pass back to java)
jdouble* likelihoodDoubleArray = (jdouble*)GET_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, &is_copy); jdouble* likelihoodDoubleArray = (jdouble*)GET_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, &is_copy);
#ifdef ENABLE_ASSERTIONS #ifdef ENABLE_ASSERTIONS
assert(likelihoodDoubleArray && "likelihoodArray is NULL"); assert(likelihoodDoubleArray && "likelihoodArray is NULL");
assert(env->GetArrayLength(likelihoodArray) == numTestCases); assert(env->GetArrayLength(likelihoodArray) == numTestCases);
#endif #endif
#ifdef DO_WARMUP #ifdef DO_WARMUP //ignore - only for crazy profiling
vector<pair<jbyteArray, jbyte*> >& haplotypeBasesArrayVector = g_haplotypeBasesArrayVector; vector<pair<jbyteArray, jbyte*> >& haplotypeBasesArrayVector = g_haplotypeBasesArrayVector;
for(unsigned i=0;i<haplotypeBasesArrayVector.size();++i) for(unsigned i=0;i<haplotypeBasesArrayVector.size();++i)
{ {
@ -299,7 +302,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
g_load_time_initializer.m_bytes_copied += (is_copy ? numTestCases*sizeof(double) : 0); g_load_time_initializer.m_bytes_copied += (is_copy ? numTestCases*sizeof(double) : 0);
get_time(&start_time); get_time(&start_time);
#endif #endif
compute_testcases(tc_array, numTestCases, likelihoodDoubleArray, maxNumThreadsToUse); compute_testcases(tc_array, numTestCases, likelihoodDoubleArray, maxNumThreadsToUse); //actual computation
#ifdef DO_PROFILING #ifdef DO_PROFILING
g_load_time_initializer.m_compute_time += diff_time(start_time); g_load_time_initializer.m_compute_time += diff_time(start_time);
#endif #endif