8.4.2 : The reduction_intrinsics.cpp file

There is the reduction_intrinsics.cpp file :
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#include <immintrin.h>
#include "reduction_intrinsics.h"

///Do the Reduction
/**	@param ptabValue : input table
 * 	@param nbElement : number of elements in the input table
 * 	@return sum of all the elements of the input table
*/
float reduction(const float * tabValue, long unsigned int nbElement){
	long unsigned int vecSize(VECTOR_ALIGNEMENT/sizeof(float));
	long unsigned int nbVec(nbElement/vecSize);
	float res(0.0f);
	__m256 vecRes = _mm256_broadcast_ss(&res);
	for(long unsigned int i(0lu); i < nbVec; ++i){
		__m256 vecValue = _mm256_load_ps(tabValue + i*vecSize);
		vecRes = _mm256_add_ps(vecRes, vecValue);
	}
	float tmp[8lu];
	_mm256_storeu_ps(tmp, vecRes);
	for(long unsigned int i(0lu); i < 8lu; ++i){
		res += tmp[i];
	}
	return res;
}