89 lines
2.6 KiB
C
89 lines
2.6 KiB
C
#ifdef PRECISION
|
|
|
|
#ifdef SIMD_ENGINE_AVX
|
|
|
|
inline void CONCAT(CONCAT(_vector_shift,SIMD_ENGINE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut)
|
|
{
|
|
IF_128 xlow , xhigh;
|
|
/* cast x to xlow */
|
|
xlow.f = VEC_CAST_256_128(x.d);
|
|
/* extract x,1 to xhigh */
|
|
xhigh.f = VEC_EXTRACT_128(x.d, 1);
|
|
/* extract xlow[3] */
|
|
IF_128 shiftOutL128;
|
|
shiftOutL128.i = _mm_srli_si128(xlow.i, SHIFT_CONST1);
|
|
/* extract xhigh[3] */
|
|
IF_MAIN_TYPE shiftOutH;
|
|
shiftOutH.i = VEC_EXTRACT_UNIT(xhigh.i, SHIFT_CONST2);
|
|
shiftOut = shiftOutH.f;
|
|
/* shift xlow */
|
|
xlow.i = _mm_slli_si128 (xlow.i, SHIFT_CONST3);
|
|
/* shift xhigh */
|
|
xhigh.i = _mm_slli_si128 (xhigh.i, SHIFT_CONST3);
|
|
/*movss shiftIn to xlow[0] */
|
|
_128_TYPE shiftIn128 = VEC_SET1_VAL128(shiftIn);
|
|
xlow.f = VEC_MOVE(xlow.f , shiftIn128);
|
|
/*movss xlow[3] to xhigh[0] */
|
|
xhigh.f = VEC_MOVE(xhigh.f, shiftOutL128.f);
|
|
/* cast xlow to x */
|
|
x.d = VEC_CAST_128_256(xlow.f);
|
|
/* insert xhigh to x,1 */
|
|
x.d = VEC_INSERT_VAL(x.d, xhigh.f, 1);
|
|
}
|
|
|
|
|
|
inline void CONCAT(CONCAT(_vector_shift_last,SIMD_ENGINE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn)
|
|
{
|
|
IF_128 xlow , xhigh;
|
|
/* cast x to xlow */
|
|
xlow.f = VEC_CAST_256_128(x.d);
|
|
/* extract x,1 to xhigh */
|
|
xhigh.f = VEC_EXTRACT_128(x.d, 1);
|
|
/* extract xlow[3] */
|
|
IF_128 shiftOutL128;
|
|
shiftOutL128.i = _mm_srli_si128(xlow.i, SHIFT_CONST1);
|
|
/* shift xlow */
|
|
xlow.i = _mm_slli_si128 (xlow.i, SHIFT_CONST3);
|
|
/* shift xhigh */
|
|
xhigh.i = _mm_slli_si128 (xhigh.i, SHIFT_CONST3);
|
|
/*movss shiftIn to xlow[0] */
|
|
_128_TYPE shiftIn128 = VEC_SET1_VAL128(shiftIn);
|
|
xlow.f = VEC_MOVE(xlow.f , shiftIn128);
|
|
/*movss xlow[3] to xhigh[0] */
|
|
xhigh.f = VEC_MOVE(xhigh.f, shiftOutL128.f);
|
|
/* cast xlow to x */
|
|
x.d = VEC_CAST_128_256(xlow.f);
|
|
/* insert xhigh to x,1 */
|
|
x.d = VEC_INSERT_VAL(x.d, xhigh.f, 1);
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef SIMD_ENGINE_SSE
|
|
|
|
inline void CONCAT(CONCAT(_vector_shift,SIMD_ENGINE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut)
|
|
{
|
|
IF_MAIN_TYPE tempIn, tempOut;
|
|
tempIn.f = shiftIn;
|
|
/* extratc H */
|
|
tempOut.i = VEC_EXTRACT_UNIT(x.i, SHIFT_CONST1);
|
|
shiftOut = tempOut.f;
|
|
/* shift */
|
|
x.i = _mm_slli_si128(x.i, SHIFT_CONST2);
|
|
/* insert L */
|
|
x.i = VEC_INSERT_UNIT(x.i , tempIn.i, SHIFT_CONST3);
|
|
}
|
|
|
|
inline void CONCAT(CONCAT(_vector_shift_last,SIMD_ENGINE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn)
|
|
{
|
|
IF_MAIN_TYPE temp; temp.f = shiftIn;
|
|
/* shift */
|
|
x.i = _mm_slli_si128(x.i, SHIFT_CONST2);
|
|
/* insert L */
|
|
x.i = VEC_INSERT_UNIT(x.i , temp.i, SHIFT_CONST3);
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|