5.3.2.1.4 : Le fichier complet


Le fichier main_intrinsics_link_block.cpp complet :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
/***************************************
	Auteur : Pierre Aubert
	Mail : aubertp7@gmail.com
	Licence : CeCILL-C
****************************************/

#include "micro_benchmark.h"

#include "temporary_alloc.h"

#include "intrinsics_propagation_link_block.h"

///Get the number of nanoseconds per elements
/**	@param nbElement : number of elements of the tables
*/
void evaluateGrayScott(size_t nbElement){

	size_t nbRow(nbElement*PLIB_VECTOR_SIZE_FLOAT*10lu);
	size_t nbCol(nbElement*PLIB_VECTOR_SIZE_FLOAT*2lu*10lu);

	nbElement = nbRow*nbCol;

	PTensor<float> tmpInU, tmpInV, tmpOutU, tmpOutV;
	float *tmpU1 = NULL, *tmpU2 = NULL, *tmpV1 = NULL, *tmpV2 = NULL;
	allocate_temporary(tmpU1, tmpU2, tmpV1, tmpV2, tmpInU, tmpInV, tmpOutU, tmpOutV, nbRow, nbCol);

	float diffudionRateU(0.1f), diffusionRateV(0.05f);
	float killRate(0.054f), feedRate(0.014f), dt(1.0f);
	long nbStencilRow(3l), nbStencilCol(3l);

	float matDeltaSquare[] = 	{1.0f, 1.0f, 1.0f,
					1.0f, 1.0f, 1.0f,
					1.0f, 1.0f, 1.0f};

	//Let's convert these temporaries into intrinsics temporaries
	PTensor<float> tmpVecInU(AllocMode::ALIGNED), tmpVecInV(AllocMode::ALIGNED), tmpVecOutU(AllocMode::ALIGNED), tmpVecOutV(AllocMode::ALIGNED);
	tmpVecInU.fromScalToVecNeigbhour(tmpInU, PLIB_VECTOR_SIZE_FLOAT);
	tmpVecInV.fromScalToVecNeigbhour(tmpInV, PLIB_VECTOR_SIZE_FLOAT);
	tmpVecOutU.fromScalToVecNeigbhour(tmpOutU, PLIB_VECTOR_SIZE_FLOAT);
	tmpVecOutV.fromScalToVecNeigbhour(tmpOutV, PLIB_VECTOR_SIZE_FLOAT);

	PTensor<float> vecMatDeltaSquare(AllocMode::ALIGNED, nbStencilRow, nbStencilCol*PLIB_VECTOR_SIZE_FLOAT);
	reshuffle_broadcastTensor(vecMatDeltaSquare.getData(), matDeltaSquare, nbStencilRow, nbStencilCol, 0lu, PLIB_VECTOR_SIZE_FLOAT);

	float * ptrVecMatStencil = vecMatDeltaSquare.getData();

// 	size_t nbVecRow(tmpVecInV.getFullNbRow()), nbVecCol(tmpVecInV.getNbCol());
	size_t blockSizeRow(72lu), blockSizeCol(127lu);

	std::vector<PBlock<float> > vecBlockOutU, vecBlockOutV, vecBlockInU, vecBlockInV;
	tmpVecOutU.splitBlockLink(vecBlockOutU, blockSizeRow, blockSizeCol, 1lu);
	tmpVecOutV.splitBlockLink(vecBlockOutV, blockSizeRow, blockSizeCol, 1lu);
	tmpVecInU.splitBlockLink(vecBlockInU, blockSizeRow, blockSizeCol, 1lu);
	tmpVecInV.splitBlockLink(vecBlockInV, blockSizeRow, blockSizeCol, 1lu);

	micro_benchmarkAutoNsPrint("evaluate GrayScott reaction, intrinsics link block", nbElement, grayscott_propagation_link_block, 
					vecBlockOutU, vecBlockOutV, vecBlockInU, vecBlockInV,
					ptrVecMatStencil, nbStencilRow, nbStencilCol,
					diffudionRateU, diffusionRateV, feedRate, killRate, dt);

}

int main(int argc, char** argv){
	return micro_benchmarkParseArg(argc, argv, evaluateGrayScott);
}


Vous pouvez le télécharger ici.