Previous The reduction_intrinsics.h file |
Parent The vectorization of reduction with intrinsic functions |
Outline | Next The main_intrinsics.cpp |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
#include <immintrin.h> #include "reduction_intrinsics.h" ///Do the Reduction /** @param ptabValue : input table * @param nbElement : number of elements in the input table * @return sum of all the elements of the input table */ float reduction(const float * tabValue, long unsigned int nbElement){ long unsigned int vecSize(VECTOR_ALIGNEMENT/sizeof(float)); long unsigned int nbVec(nbElement/vecSize); float res(0.0f); __m256 vecRes = _mm256_broadcast_ss(&res); for(long unsigned int i(0lu); i < nbVec; ++i){ __m256 vecValue = _mm256_load_ps(tabValue + i*vecSize); vecRes = _mm256_add_ps(vecRes, vecValue); } float tmp[8lu]; _mm256_storeu_ps(tmp, vecRes); for(long unsigned int i(0lu); i < 8lu; ++i){ res += tmp[i]; } return res; } |
Previous The reduction_intrinsics.h file |
Parent The vectorization of reduction with intrinsic functions |
Outline | Next The main_intrinsics.cpp |