diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java
index 0c984b4c3..0f2c61290 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java
@@ -98,7 +98,7 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
                                    }
                                    catch(UnsatisfiedLinkError ule)
                                    {
-                                       logger.info("Failed to load native library for VectorLoglessPairHMM - using Java implementation of LOGLESS_CACHING");
+                                       logger.warn("Failed to load native library for VectorLoglessPairHMM - using Java implementation of LOGLESS_CACHING");
                                        return new LoglessPairHMM();
                                    }
                     case DEBUG_VECTOR_LOGLESS_CACHING:
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/DebugJNILoglessPairHMM.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/DebugJNILoglessPairHMM.java
index beaca115a..ea93ebe4a 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/DebugJNILoglessPairHMM.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/DebugJNILoglessPairHMM.java
@@ -449,21 +449,7 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
     })
     @Ensures("constantsAreInitialized")
     protected static void initializeProbabilities(final double[][] transition, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP) {
-        for (int i = 0; i < insertionGOP.length; i++) {
-            final int qualIndexGOP = Math.min(insertionGOP[i] + deletionGOP[i], Byte.MAX_VALUE);
-            transition[i+1][matchToMatch] = QualityUtils.qualToProb((byte) qualIndexGOP);
-            transition[i+1][indelToMatch] = QualityUtils.qualToProb(overallGCP[i]);
-            transition[i+1][matchToInsertion] = QualityUtils.qualToErrorProb(insertionGOP[i]);
-            transition[i+1][insertionToInsertion] = QualityUtils.qualToErrorProb(overallGCP[i]);
-            transition[i+1][matchToDeletion] = QualityUtils.qualToErrorProb(deletionGOP[i]);
-            transition[i+1][deletionToDeletion] = QualityUtils.qualToErrorProb(overallGCP[i]);
-
-            //TODO it seems that it is not always the case that matchToMatch + matchToDeletion + matchToInsertion == 1.
-            //TODO We have detected cases of 1.00002 which can cause problems downstream. This are typically masked
-            //TODO by the fact that we always add a indelToMatch penalty to all PairHMM likelihoods (~ -0.1)
-            //TODO This is in fact not well justified and although it does not have any effect (since is equally added to all
-            //TODO haplotypes likelihoods) perhaps we should just remove it eventually and fix this != 1.0 issue here.
-        }
+        PairHMMModel.qualToTransProbs(transition,insertionGOP,deletionGOP,overallGCP);
     }
 
     /**
diff --git a/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.cc b/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.cc
index 0fec4c8da..3aacefe72 100644
--- a/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.cc
+++ b/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.cc
@@ -14,7 +14,6 @@ char* LoadTimeInitializerStatsNames[] =
   "dummy"
 };
 
-
 LoadTimeInitializer g_load_time_initializer;
 
 LoadTimeInitializer::LoadTimeInitializer()		//will be called when library is loaded
@@ -54,6 +53,10 @@ LoadTimeInitializer::LoadTimeInitializer()		//will be called when library is loa
   
   initialize_function_pointers();
 
+  //Initialize static members of class
+  Context<float>::initializeStaticMembers();
+  Context<double>::initializeStaticMembers();
+
   cout.flush();
 }
 
diff --git a/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.h b/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.h
index 778ae221f..1834405a5 100644
--- a/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.h
+++ b/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.h
@@ -22,7 +22,10 @@ class LoadTimeInitializer
 {
   public:
     LoadTimeInitializer();		//will be called when library is loaded
-    ~LoadTimeInitializer()  { delete m_buffer;	}
+    ~LoadTimeInitializer()
+    {
+      delete m_buffer;
+    }
     void print_profiling();
     void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true);
     void debug_close();
diff --git a/public/VectorPairHMM/src/main/c++/Makefile b/public/VectorPairHMM/src/main/c++/Makefile
index e1e443d2e..da91149e9 100644
--- a/public/VectorPairHMM/src/main/c++/Makefile
+++ b/public/VectorPairHMM/src/main/c++/Makefile
@@ -4,7 +4,7 @@
 #CFLAGS=-O2 -std=c++11 -W -Wall -march=corei7-avx -Wa,-q            -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
 #CFLAGS=-O2             -W -Wall -march=corei7 -mfpmath=sse -msse4.2 -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
 
-JRE_HOME?=/opt/jdk1.7.0_25/
+JRE_HOME?=/opt/jdk1.7.0_25/jre
 JNI_COMPILATION_FLAGS=-D_REENTRANT -fPIC -I${JRE_HOME}/../include -I${JRE_HOME}/../include/linux
 
 COMMON_COMPILATION_FLAGS=$(JNI_COMPILATION_FLAGS) -O3 -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
diff --git a/public/VectorPairHMM/src/main/c++/baseline.cc b/public/VectorPairHMM/src/main/c++/baseline.cc
index 290a3a4a2..66aeacbc4 100644
--- a/public/VectorPairHMM/src/main/c++/baseline.cc
+++ b/public/VectorPairHMM/src/main/c++/baseline.cc
@@ -2,8 +2,8 @@
 #include "template.h"
 #include "utils.h"
 #include "LoadTimeInitializer.h"
-
 using namespace std;
+
 template<class NUMBER>
 NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
 {
@@ -62,7 +62,8 @@ NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
     int _i = tc->i[r-1] & 127;
     int _d = tc->d[r-1] & 127;
     int _c = tc->c[r-1] & 127;
-    p[r][MM] = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127];
+    //p[r][MM] = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127];
+    SET_MATCH_TO_MATCH_PROB(p[r][MM], _i, _d);
     p[r][GapM] = ctx._(1.0) - ctx.ph2pr[_c];
     p[r][MX] = ctx.ph2pr[_i];
     p[r][XX] = ctx.ph2pr[_c];
diff --git a/public/VectorPairHMM/src/main/c++/pairhmm-template-kernel.cc b/public/VectorPairHMM/src/main/c++/pairhmm-template-kernel.cc
index a2111b6b6..77394f457 100644
--- a/public/VectorPairHMM/src/main/c++/pairhmm-template-kernel.cc
+++ b/public/VectorPairHMM/src/main/c++/pairhmm-template-kernel.cc
@@ -116,7 +116,8 @@ template<class NUMBER> void CONCAT(CONCAT(initializeVectors,SIMD_ENGINE), PRECIS
         int _d = tc->d[r-1] & 127;
         int _c = tc->c[r-1] & 127;
 
-        *(ptr_p_MM+r-1) = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127];
+        //*(ptr_p_MM+r-1) = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127];
+        SET_MATCH_TO_MATCH_PROB(*(ptr_p_MM+r-1), _i, _d);
         *(ptr_p_GAPM+r-1) = ctx._(1.0) - ctx.ph2pr[_c];
         *(ptr_p_MX+r-1) = ctx.ph2pr[_i];
         *(ptr_p_XX+r-1) = ctx.ph2pr[_c];
diff --git a/public/VectorPairHMM/src/main/c++/run.sh b/public/VectorPairHMM/src/main/c++/run.sh
index 7cc240c9d..e821497f1 100755
--- a/public/VectorPairHMM/src/main/c++/run.sh
+++ b/public/VectorPairHMM/src/main/c++/run.sh
@@ -8,8 +8,8 @@ then
 fi
 
 #-Djava.library.path is needed if you wish to override the default 'packed' library
-#java  -Djava.library.path=${GSA_ROOT_DIR}/public/VectorPairHMM/src/main/c++ -jar $GSA_ROOT_DIR/target/GenomeAnalysisTK.jar   -T HaplotypeCaller \
-java -jar $GSA_ROOT_DIR/target/GenomeAnalysisTK.jar   -T HaplotypeCaller \
+#java -jar $GSA_ROOT_DIR/target/GenomeAnalysisTK.jar   -T HaplotypeCaller \
+java  -Djava.library.path=${GSA_ROOT_DIR}/public/VectorPairHMM/src/main/c++ -jar $GSA_ROOT_DIR/target/GenomeAnalysisTK.jar   -T HaplotypeCaller \
 --dbsnp /data/broad/samples/joint_variant_calling/dbSNP/00-All.vcf \
 -R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \
 -I /data/simulated/sim1M_pairs_final.bam \
diff --git a/public/VectorPairHMM/src/main/c++/template.h b/public/VectorPairHMM/src/main/c++/template.h
index f91c2300e..3e52854a6 100644
--- a/public/VectorPairHMM/src/main/c++/template.h
+++ b/public/VectorPairHMM/src/main/c++/template.h
@@ -94,57 +94,158 @@ typedef union ALIGNED
         ALIGNED double  ALIGNED f;
 } ALIGNED IF_64 ALIGNED;
 
-template<class T>
-struct Context{};
+
+#define MAX_QUAL 254
+#define MAX_JACOBIAN_TOLERANCE 8.0
+#define JACOBIAN_LOG_TABLE_STEP 0.0001
+#define JACOBIAN_LOG_TABLE_INV_STEP (1.0 / JACOBIAN_LOG_TABLE_STEP)
+#define MAXN 70000
+#define LOG10_CACHE_SIZE  (4*MAXN)  // we need to be able to go up to 2*(2N) when calculating some of the coefficients
+#define JACOBIAN_LOG_TABLE_SIZE ((int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1)
+
+template<class NUMBER>
+struct ContextBase
+{
+  public:
+    NUMBER ph2pr[128];
+    NUMBER INITIAL_CONSTANT;
+    NUMBER LOG10_INITIAL_CONSTANT;
+    NUMBER RESULT_THRESHOLD; 
+
+    static bool staticMembersInitializedFlag;
+    static NUMBER jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE];
+    static NUMBER matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1];
+
+    static void initializeStaticMembers()
+    {
+      if(!staticMembersInitializedFlag)
+      {
+        //Order of calls important - Jacobian first, then MatchToMatch
+        initializeJacobianLogTable();
+        initializeMatchToMatchProb();
+        staticMembersInitializedFlag = true;
+      }
+    }
+
+    static void deleteStaticMembers()
+    {
+      if(staticMembersInitializedFlag)
+      {
+        staticMembersInitializedFlag = false;
+      }
+    }
+
+    //Called only once during library load - don't bother to optimize with single precision fp
+    static void initializeJacobianLogTable()
+    {
+      for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
+        jacobianLogTable[k] = (NUMBER)(log10(1.0 + pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP)));
+      }
+    }
+
+    //Called only once per library load - don't bother optimizing with single fp
+    static void initializeMatchToMatchProb()
+    {
+      double LN10 = log(10);
+      double INV_LN10 = 1.0/LN10;
+      for (int i = 0, offset = 0; i <= MAX_QUAL; offset += ++i)
+        for (int j = 0; j <= i; j++) {
+          double log10Sum = approximateLog10SumLog10(-0.1*i, -0.1*j);
+          double matchToMatchLog10 =
+            log1p(-std::min(1.0,pow(10,log10Sum))) * INV_LN10;
+          matchToMatchProb[offset + j] = (NUMBER)(pow(10,matchToMatchLog10));
+        }
+    }
+    //Called during computation - use single precision where possible
+    static int fastRound(NUMBER d) {
+      return (d > ((NUMBER)0.0)) ? (int) (d + ((NUMBER)0.5)) : (int) (d - ((NUMBER)0.5));
+    }
+    //Called during computation - use single precision where possible
+    static NUMBER approximateLog10SumLog10(NUMBER small, NUMBER big) {
+      // make sure small is really the smaller value
+      if (small > big) {
+        NUMBER t = big;
+        big = small;
+        small = t;
+      }
+
+      if (isinf(small) == -1 || isinf(big) == -1)
+        return big;
+
+      NUMBER diff = big - small;
+      if (diff >= ((NUMBER)MAX_JACOBIAN_TOLERANCE))
+        return big;
+
+      // OK, so |y-x| < tol: we use the following identity then:
+      // we need to compute log10(10^x + 10^y)
+      // By Jacobian logarithm identity, this is equal to
+      // max(x,y) + log10(1+10^-abs(x-y))
+      // we compute the second term as a table lookup with integer quantization
+      // we have pre-stored correction for 0,0.1,0.2,... 10.0
+      int ind = fastRound((NUMBER)(diff * ((NUMBER)JACOBIAN_LOG_TABLE_INV_STEP))); // hard rounding
+      return big + jacobianLogTable[ind];
+    }
+};
+
+template<class NUMBER>
+struct Context : public ContextBase<NUMBER>
+{};
 
 template<>
-struct Context<double>
+struct Context<double> : public ContextBase<double>
 {
-        Context()
-        {
-                for (int x = 0; x < 128; x++)
-                        ph2pr[x] = pow(10.0, -((double)x) / 10.0);
+  Context():ContextBase<double>()
+  {
+    for (int x = 0; x < 128; x++)
+      ph2pr[x] = pow(10.0, -((double)x) / 10.0);
 
-                INITIAL_CONSTANT = ldexp(1.0, 1020.0);
-                LOG10_INITIAL_CONSTANT = log10(INITIAL_CONSTANT);
-                RESULT_THRESHOLD = 0.0;
-        }
+    INITIAL_CONSTANT = ldexp(1.0, 1020.0);
+    LOG10_INITIAL_CONSTANT = log10(INITIAL_CONSTANT);
+    RESULT_THRESHOLD = 0.0;
+  }
 
-        double LOG10(double v){ return log10(v); }
+  double LOG10(double v){ return log10(v); }
+  inline double POW(double b, double e) { return pow(b,e); }
 
-        static double _(double n){ return n; }
-        static double _(float n){ return ((double) n); }
-        double ph2pr[128];
-        double INITIAL_CONSTANT;
-        double LOG10_INITIAL_CONSTANT;
-        double RESULT_THRESHOLD;
+  static double _(double n){ return n; }
+  static double _(float n){ return ((double) n); }
 };
 
 template<>
-struct Context<float>
+struct Context<float> : public ContextBase<float>
 {
-        Context()
-        {
-                for (int x = 0; x < 128; x++)
-                {
-                        ph2pr[x] = powf(10.f, -((float)x) / 10.f);
-                }
+  Context() : ContextBase<float>()
+  {
+    for (int x = 0; x < 128; x++)
+    {
+      ph2pr[x] = powf(10.f, -((float)x) / 10.f);
+    }
 
-                INITIAL_CONSTANT = ldexpf(1.f, 120.f);
-                LOG10_INITIAL_CONSTANT = log10f(INITIAL_CONSTANT);
-                RESULT_THRESHOLD = ldexpf(1.f, -110.f);
-        }
+    INITIAL_CONSTANT = ldexpf(1.f, 120.f);
+    LOG10_INITIAL_CONSTANT = log10f(INITIAL_CONSTANT);
+    RESULT_THRESHOLD = ldexpf(1.f, -110.f);
+  }
 
-        float LOG10(float v){ return log10f(v); }
+  float LOG10(float v){ return log10f(v); }
+  inline float POW(float b, float e) { return powf(b,e); }
 
-        static float _(double n){ return ((float) n); }
-        static float _(float n){ return n; }
-        float ph2pr[128];
-        float INITIAL_CONSTANT;
-        float LOG10_INITIAL_CONSTANT;
-        float RESULT_THRESHOLD;
+  static float _(double n){ return ((float) n); }
+  static float _(float n){ return n; }
 };
 
+#define SET_MATCH_TO_MATCH_PROB(output, insQual, delQual)                       \
+{                                                                               \
+  int minQual = delQual;                                                        \
+  int maxQual = insQual;                                                        \
+  if (insQual <= delQual)                                                       \
+  {                                                                             \
+    minQual = insQual;                                                          \
+    maxQual = delQual;                                                          \
+  }                                                                             \
+  (output) = (MAX_QUAL < maxQual) ?                                             \
+  ((NUMBER)1.0) - ctx.POW(((NUMBER)10), ctx.approximateLog10SumLog10(((NUMBER)-0.1)*minQual, ((NUMBER)-0.1)*maxQual))       \
+  : ctx.matchToMatchProb[((maxQual * (maxQual + 1)) >> 1) + minQual];           \
+}
 
 typedef struct
 {
diff --git a/public/VectorPairHMM/src/main/c++/utils.cc b/public/VectorPairHMM/src/main/c++/utils.cc
index 387a6b4a0..35871948c 100644
--- a/public/VectorPairHMM/src/main/c++/utils.cc
+++ b/public/VectorPairHMM/src/main/c++/utils.cc
@@ -3,12 +3,21 @@
 #include "utils.h"
 #include "vector_defs.h"
 #include "LoadTimeInitializer.h"
+using namespace std;
 
+//static members from ConvertChar
 uint8_t ConvertChar::conversionTable[255];
+//Global function pointers in utils.h
 float (*g_compute_full_prob_float)(testcase *tc, float* before_last_log) = 0;
 double (*g_compute_full_prob_double)(testcase *tc, double* before_last_log) = 0;
+//Static members in ContextBase
+bool ContextBase<double>::staticMembersInitializedFlag = false;
+double ContextBase<double>::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE];
+double ContextBase<double>::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1];
+bool ContextBase<float>::staticMembersInitializedFlag = false;
+float ContextBase<float>::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE];
+float ContextBase<float>::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1];
 
-using namespace std;
 
 bool is_avx_supported()
 {
diff --git a/public/sting-utils/src/main/resources/org/broadinstitute/sting/utils/pairhmm/libVectorLoglessPairHMM.so b/public/sting-utils/src/main/resources/org/broadinstitute/sting/utils/pairhmm/libVectorLoglessPairHMM.so
index 07fcc79d0..17cec8be4 100644
Binary files a/public/sting-utils/src/main/resources/org/broadinstitute/sting/utils/pairhmm/libVectorLoglessPairHMM.so and b/public/sting-utils/src/main/resources/org/broadinstitute/sting/utils/pairhmm/libVectorLoglessPairHMM.so differ