1. Added the catch UnsatisfiedLinkError exception in
PairHMMLikelihoodCalculationEngine.java to fall back to LOGLESS_CACHING in case the native library could not be loaded. Made VECTOR_LOGLESS_CACHING as the default implementation. 2. Updated the README with Mauricio's comments 3. baseline.cc is used within the library - if the machine supports neither AVX nor SSE4.1, the native library falls back to un-vectorized C++ in baseline.cc. 4. pairhmm-1-base.cc: This is not part of the library, but is being heavily used for debugging/profiling. Can I request that we keep it there for now? In the next release, we can delete it from the repository. 5. I agree with Mauricio about the ifdefs. I am sure you already know, but just to reassure you the debug code is not compiled into the library (because of the ifdefs) and will not affect performance.
This commit is contained in:
parent
2d0ce45bb0
commit
37526dfad5
|
|
@ -426,7 +426,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
*/
|
||||
@Hidden
|
||||
@Argument(fullName = "pair_hmm_implementation", shortName = "pairHMM", doc = "The PairHMM implementation to use for genotype likelihood calculations", required = false)
|
||||
public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING;
|
||||
public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.VECTOR_LOGLESS_CACHING;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false)
|
||||
|
|
|
|||
|
|
@ -81,29 +81,37 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
|
|||
private final boolean noFpga;
|
||||
|
||||
private final ThreadLocal<PairHMM> pairHMMThreadLocal = new ThreadLocal<PairHMM>() {
|
||||
@Override
|
||||
protected PairHMM initialValue() {
|
||||
switch (hmmType) {
|
||||
case EXACT: return new Log10PairHMM(true);
|
||||
case ORIGINAL: return new Log10PairHMM(false);
|
||||
case LOGLESS_CACHING:
|
||||
if (noFpga || !CnyPairHMM.isAvailable())
|
||||
return new LoglessPairHMM();
|
||||
else
|
||||
return new CnyPairHMM();
|
||||
case VECTOR_LOGLESS_CACHING:
|
||||
return new VectorLoglessPairHMM();
|
||||
case DEBUG_VECTOR_LOGLESS_CACHING:
|
||||
return new DebugJNILoglessPairHMM(PairHMM.HMM_IMPLEMENTATION.VECTOR_LOGLESS_CACHING);
|
||||
case ARRAY_LOGLESS:
|
||||
if (noFpga || !CnyPairHMM.isAvailable())
|
||||
return new ArrayLoglessPairHMM();
|
||||
else
|
||||
return new CnyPairHMM();
|
||||
default:
|
||||
throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the HaplotypeCaller. Acceptable options are ORIGINAL, EXACT, CACHING, LOGLESS_CACHING, and ARRAY_LOGLESS.");
|
||||
}
|
||||
}
|
||||
@Override
|
||||
protected PairHMM initialValue() {
|
||||
switch (hmmType) {
|
||||
case EXACT: return new Log10PairHMM(true);
|
||||
case ORIGINAL: return new Log10PairHMM(false);
|
||||
case LOGLESS_CACHING:
|
||||
if (noFpga || !CnyPairHMM.isAvailable())
|
||||
return new LoglessPairHMM();
|
||||
else
|
||||
return new CnyPairHMM();
|
||||
case VECTOR_LOGLESS_CACHING:
|
||||
try
|
||||
{
|
||||
return new VectorLoglessPairHMM();
|
||||
}
|
||||
catch(UnsatisfiedLinkError ule)
|
||||
{
|
||||
logger.info("Failed to load native library for VectorLoglessPairHMM - using Java implementation of LOGLESS_CACHING");
|
||||
return new LoglessPairHMM();
|
||||
}
|
||||
case DEBUG_VECTOR_LOGLESS_CACHING:
|
||||
return new DebugJNILoglessPairHMM(PairHMM.HMM_IMPLEMENTATION.VECTOR_LOGLESS_CACHING);
|
||||
case ARRAY_LOGLESS:
|
||||
if (noFpga || !CnyPairHMM.isAvailable())
|
||||
return new ArrayLoglessPairHMM();
|
||||
else
|
||||
return new CnyPairHMM();
|
||||
default:
|
||||
throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the HaplotypeCaller. Acceptable options are ORIGINAL, EXACT, CACHING, LOGLESS_CACHING, and ARRAY_LOGLESS.");
|
||||
}
|
||||
}
|
||||
};
|
||||
// Attempted to do as below, to avoid calling pairHMMThreadLocal.get() later on, but it resulted in a NullPointerException
|
||||
// private final PairHMM pairHMM = pairHMMThreadLocal.get();
|
||||
|
|
@ -340,7 +348,7 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
|
|||
|
||||
private void finalizePairHMM()
|
||||
{
|
||||
pairHMMThreadLocal.get().finalizeRegion();
|
||||
pairHMMThreadLocal.get().finalizeRegion();
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -360,9 +368,9 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
|
|||
map.filterPoorlyModelledReads(EXPECTED_ERROR_RATE_PER_BASE);
|
||||
stratifiedReadMap.put(sampleEntry.getKey(), map);
|
||||
}
|
||||
|
||||
//Used mostly by the JNI implementation(s) to free arrays
|
||||
finalizePairHMM();
|
||||
|
||||
//Used mostly by the JNI implementation(s) to free arrays
|
||||
finalizePairHMM();
|
||||
|
||||
return stratifiedReadMap;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -97,8 +97,9 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
|
|||
/**
|
||||
* Return 64-bit mask representing machine capabilities
|
||||
* Bit 0 is LSB, bit 63 MSB
|
||||
* Bit 0 represents sse4.2 availability
|
||||
* Bit 1 represents AVX availability
|
||||
* Bit 0 represents sse4.1 availability
|
||||
* Bit 1 represents sse4.2 availability
|
||||
* Bit 2 represents AVX availability
|
||||
*/
|
||||
public native long jniGetMachineType();
|
||||
|
||||
|
|
@ -125,6 +126,7 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
|
|||
//Useful if someone builds his/her own library and wants to override the bundled
|
||||
//implementation without modifying the Java code
|
||||
System.loadLibrary("VectorLoglessPairHMM");
|
||||
logger.info("libVectorLoglessPairHMM found in JVM library path");
|
||||
}
|
||||
catch(UnsatisfiedLinkError ule)
|
||||
{
|
||||
|
|
@ -133,6 +135,7 @@ public class VectorLoglessPairHMM extends JNILoglessPairHMM {
|
|||
{
|
||||
logger.info("libVectorLoglessPairHMM not found in JVM library path - trying to unpack from StingUtils.jar");
|
||||
loadLibraryFromJar("/org/broadinstitute/sting/utils/pairhmm/libVectorLoglessPairHMM.so");
|
||||
logger.info("libVectorLoglessPairHMM unpacked successfully from StingUtils.jar");
|
||||
}
|
||||
catch(IOException ioe)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -2,47 +2,70 @@ Implementation overview:
|
|||
Created a new Java class called VectorLoglessPairHMM which extends LoglessPairHMM and
|
||||
overrides functions from both LoglessPairHMM and PairHMM.
|
||||
1. Constructor: Call base class constructors. Then, load the native library located in this
|
||||
directory and call a global init function in the library to determine fields ids for the
|
||||
members of classes JNIReadDataHolder and JNIHaplotypeDataHolders.
|
||||
2. When the library is loaded, it initializes two global function pointers to point to the
|
||||
function implementation that is supported on the machine on which the program is being
|
||||
run. The two pointers are for float and double respectively. This initialization is done
|
||||
only once for the whole program.
|
||||
3. initialize(): To initialized the region for PairHMM. Pass haplotype bases to native
|
||||
code through the JNIHaplotypeDataHolders class. Since the haplotype list is common across multiple
|
||||
samples in computeReadLikelihoods(), we can store the haplotype bases to the native code once and
|
||||
re-use across multiple samples.
|
||||
directory and call an init function (with suffix 'jniInitializeClassFieldsAndMachineMask') in the
|
||||
library to determine fields ids for the members of classes JNIReadDataHolder and
|
||||
JNIHaplotypeDataHolders. The native code stores the field ids (struct offsets) for the classes and
|
||||
re-uses them for subsequent computations. Optionally, the user can disable the vector
|
||||
implementation, by using the 'mask' argument (see comments for a more detailed explanation).
|
||||
2. When the library is loaded, it invokes the constructor of the class LoadTimeInitializer (because
|
||||
a global variable g_load_time_initializer is declared in the library). This constructor
|
||||
(LoadTimeInitializer.cc) can be used to perform various initializations. Currently, it initializes
|
||||
two global function pointers to point to the function implementation that is supported on the
|
||||
machine (AVX/SSE/un-vectorized) on which the program is being run. The two pointers are for float
|
||||
and double respectively. The global function pointers are declared in utils.cc and are assigned in
|
||||
the function initialize_function_pointers() defined in utils.cc and invoked from the constructor of
|
||||
LoadTimeInitializer.
|
||||
Other initializations in LoadTimeInitializer:
|
||||
* ConvertChar::init - sets some masks for the vector implementation
|
||||
* FTZ for performance
|
||||
* stat counters = 0
|
||||
* debug structs (which are never used in non-debug mode)
|
||||
This initialization is done only once for the whole program.
|
||||
3. initialize(): To initialize the region for PairHMM. Pass haplotype bases to native code through
|
||||
the JNIHaplotypeDataHolder class. Since the haplotype list is common across multiple samples in
|
||||
computeReadLikelihoods(), we can pass the haplotype bases to the native code once and re-use across
|
||||
multiple samples.
|
||||
4. computeLikelihoods(): Copies array references for readBases/quals etc to array of
|
||||
JNIReadDataHolder objects. Invokes the JNI function to perform the computation and
|
||||
updates the likelihoodMap.
|
||||
JNIReadDataHolder objects. Invokes the JNI function to perform the computation and updates the
|
||||
likelihoodMap.
|
||||
The JNI function copies the byte array references into an array of testcase structs and invokes the
|
||||
compute_full_prob function through the function pointers initialized earlier.
|
||||
The primary native function called is
|
||||
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods. It uses
|
||||
standard JNI calls to get and return data from/to the Java class VectorLoglessPairHMM. The last
|
||||
argument to the function is the maximum number of OpenMP threads to use while computing PairHMM in
|
||||
C++. This option is set when the native function call is made from JNILoglessPairHMM
|
||||
computeLikelihoods - currently it is set to 12 (no logical reason).
|
||||
Note: OpenMP has been disabled for now - insufficient #testcases per call to computeLikelihoods() to
|
||||
justify multi-threading.
|
||||
5. finalizeRegion(): Releases the haplotype arrays initialized in step 3 - should be called at the
|
||||
end of every region (line 351 in PairHMMLikelihoodCalculationEngine).
|
||||
|
||||
Note: Debug code has been moved to a separate class DebugJNILoglessPairHMM.java.
|
||||
|
||||
On the C++ side, the primary function called is
|
||||
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods. It
|
||||
uses standard JNI calls to get and return data from/to the Java class
|
||||
VectorLoglessPairHMM. The last argument to the function is the maximum number of OpenMP
|
||||
threads to use while computing PairHMM in C++. This option is set when the native function
|
||||
call is made from JNILoglessPairHMM computeLikelihoods - currently it is set to 12 (no
|
||||
logical reason).
|
||||
Note: OpenMP has been disabled for now.
|
||||
|
||||
Compiling:
|
||||
Make sure you have icc (Intel C compiler) available. Currently, gcc does not seem to
|
||||
support all AVX intrinsics.
|
||||
Type 'make'. This should create a library called libVectorLoglessPairHMM.so
|
||||
Make sure you have icc (Intel C compiler) available. Currently, gcc does not seem to support all AVX
|
||||
intrinsics.
|
||||
This native library is called libVectorLoglessPairHMM.so
|
||||
Using Maven:
|
||||
Type 'mvn install' in this directory - this will build the library (by invoking 'make') and copy the
|
||||
native library to the directory
|
||||
${sting-utils.basedir}/src/main/resources/org/broadinstitute/sting/utils/pairhmm
|
||||
The GATK maven build process (when run) will bundle the library into the StingUtils jar file from
|
||||
the copied directory.
|
||||
Simple build:
|
||||
cd src/main/c++
|
||||
make
|
||||
|
||||
Running:
|
||||
The default implementation of PairHMM is still LOGLESS_CACHING in HaplotypeCaller.java. To use the
|
||||
native version, use the command line argument "--pair_hmm_implementation VECTOR_LOGLESS_CACHING"
|
||||
(see run.sh in src/main/c++).
|
||||
The native library is bundled with the StingUtils jar file. When HaplotypeCaller is invoked with the
|
||||
VectorLoglessPairHMM implementation (see run.sh in the directory src/main/c++), then the library is
|
||||
unpacked from the jar file, copied to the /tmp directory (with a unique id) and loaded by the Java class
|
||||
VectorLoglessPairHMM in the constructor (if it has not been loaded already).
|
||||
The default library can be overridden by using the -Djava.library.path argument for the JVM to pass
|
||||
the path to the library. If the library libVectorLoglessPairHMM.so can be found in
|
||||
java.library.path, then it is loaded and the 'packed' library is not used.
|
||||
See run.sh in this directory on how to invoke HaplotypeCaller with the vector implementation of
|
||||
PairHMM. The argument -Djava.library.path is needed if you wish to override the default packed
|
||||
library, else unnecessary.
|
||||
The default implementation of PairHMM is now VECTOR_LOGLESS_CACHING in HaplotypeCaller.java. To use
|
||||
the Java version, use the command line argument "--pair_hmm_implementation LOGLESS_CACHING". (see
|
||||
run.sh in src/main/c++).
|
||||
The native library is bundled with the StingUtils jar file. When HaplotypeCaller is invoked, then
|
||||
the library is unpacked from the jar file, copied to the /tmp directory (with a unique id) and
|
||||
loaded by the Java class VectorLoglessPairHMM in the constructor (if it has not been loaded
|
||||
already).
|
||||
The default library can be overridden by using the -Djava.library.path argument (see
|
||||
src/main/c++/run.sh for an example) for the JVM to pass the path to the library. If the library
|
||||
libVectorLoglessPairHMM.so can be found in java.library.path, then it is loaded and the 'packed'
|
||||
library is not used.
|
||||
|
|
|
|||
|
|
@ -98,6 +98,8 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
|
|||
}
|
||||
}
|
||||
|
||||
//Create a vector of testcases for computation - copy the references to bytearrays read/readQuals etc into the appropriate
|
||||
//testcase struct
|
||||
inline JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeTestcasesVector
|
||||
(JNIEnv* env, jint numReads, jint numHaplotypes, jobjectArray& readDataArray,
|
||||
vector<vector<pair<jbyteArray,jbyte*> > >& readBasesArrayVector, vector<testcase>& tc_array)
|
||||
|
|
@ -272,12 +274,13 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
|
|||
g_load_time_initializer.m_data_transfer_time += diff_time(start_time);
|
||||
#endif
|
||||
|
||||
//Get double array where results are stored (to pass back to java)
|
||||
jdouble* likelihoodDoubleArray = (jdouble*)GET_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, &is_copy);
|
||||
#ifdef ENABLE_ASSERTIONS
|
||||
assert(likelihoodDoubleArray && "likelihoodArray is NULL");
|
||||
assert(env->GetArrayLength(likelihoodArray) == numTestCases);
|
||||
#endif
|
||||
#ifdef DO_WARMUP
|
||||
#ifdef DO_WARMUP //ignore - only for crazy profiling
|
||||
vector<pair<jbyteArray, jbyte*> >& haplotypeBasesArrayVector = g_haplotypeBasesArrayVector;
|
||||
for(unsigned i=0;i<haplotypeBasesArrayVector.size();++i)
|
||||
{
|
||||
|
|
@ -299,7 +302,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
|
|||
g_load_time_initializer.m_bytes_copied += (is_copy ? numTestCases*sizeof(double) : 0);
|
||||
get_time(&start_time);
|
||||
#endif
|
||||
compute_testcases(tc_array, numTestCases, likelihoodDoubleArray, maxNumThreadsToUse);
|
||||
compute_testcases(tc_array, numTestCases, likelihoodDoubleArray, maxNumThreadsToUse); //actual computation
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_compute_time += diff_time(start_time);
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue