6.7.1 : The C++ kernel

Here, we only have to extract the kernel from the main_intrinsics.cpp file.

The full hadamard_intrinsics_pitch.h file :
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/***************************************
	Auteur : Pierre Aubert
	Mail : aubertp7@gmail.com
	Licence : CeCILL-C
****************************************/

#ifndef __HADAMARD_INTRINSICS_PITCH_H__
#define __HADAMARD_INTRINSICS_PITCH_H__


void hadamard_product(float* tabResult, const float* tabX, const float* tabY, long unsigned int nbElement);



#endif
You can download it here. The full hadamard_intrinsics_pitch.cpp file :
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/***************************************
	Auteur : Pierre Aubert
	Mail : aubertp7@gmail.com
	Licence : CeCILL-C
****************************************/

#include <immintrin.h>

#include <string.h>
#include "hadamard_intrinsics_pitch.h"

///Do the Hadamard product
/**	@param[out] tabResult : table of results of tabX*tabY
 * 	@param tabX : input table
 * 	@param tabY : input table
 * 	@param nbElement : number of elements in the tables
*/
void hadamard_product(float* tabResult, const float* tabX, const float* tabY, long unsigned int nbElement){
	long unsigned int vecSize(VECTOR_ALIGNEMENT/sizeof(float));
	long unsigned int nbVec(nbElement/vecSize);
	for(long unsigned int i(0lu); i < nbVec; ++i){
		__m256 vecX = _mm256_load_ps(tabX + i*vecSize);
		__m256 vecY = _mm256_load_ps(tabY + i*vecSize);
		__m256 vecRes = _mm256_mul_ps(vecX, vecY);
		_mm256_store_ps(tabResult + i*vecSize, vecRes);
	}
}
You can download it here.