Previous La fonction principale du programme |
Parent Notre fichier main_intrinsics_block.cpp |
Outline | Next Le fichier CMakeLists.txt |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
/*************************************** Auteur : Pierre Aubert Mail : aubertp7@gmail.com Licence : CeCILL-C ****************************************/ #include "micro_benchmark.h" #include "temporary_alloc.h" #include "intrinsics_propagation_block.h" ///Get the number of nanoseconds per elements /** @param nbElement : number of elements of the tables */ void evaluateGrayScott(size_t nbElement){ size_t nbRow(nbElement*PLIB_VECTOR_SIZE_FLOAT*10lu); size_t nbCol(nbElement*PLIB_VECTOR_SIZE_FLOAT*2lu*10lu); nbElement = nbRow*nbCol; PTensor<float> tmpInU, tmpInV, tmpOutU, tmpOutV; float *tmpU1 = NULL, *tmpU2 = NULL, *tmpV1 = NULL, *tmpV2 = NULL; allocate_temporary(tmpU1, tmpU2, tmpV1, tmpV2, tmpInU, tmpInV, tmpOutU, tmpOutV, nbRow, nbCol); float diffudionRateU(0.1f), diffusionRateV(0.05f); float killRate(0.054f), feedRate(0.014f), dt(1.0f); long nbStencilRow(3l), nbStencilCol(3l); float matDeltaSquare[] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; //Let's convert these temporaries into intrinsics temporaries PTensor<float> tmpVecInU(AllocMode::ALIGNED), tmpVecInV(AllocMode::ALIGNED), tmpVecOutU(AllocMode::ALIGNED), tmpVecOutV(AllocMode::ALIGNED); tmpVecInU.fromScalToVecNeigbhour(tmpInU, PLIB_VECTOR_SIZE_FLOAT); tmpVecInV.fromScalToVecNeigbhour(tmpInV, PLIB_VECTOR_SIZE_FLOAT); tmpVecOutU.fromScalToVecNeigbhour(tmpOutU, PLIB_VECTOR_SIZE_FLOAT); tmpVecOutV.fromScalToVecNeigbhour(tmpOutV, PLIB_VECTOR_SIZE_FLOAT); PTensor<float> vecMatDeltaSquare(AllocMode::ALIGNED, nbStencilRow, nbStencilCol*PLIB_VECTOR_SIZE_FLOAT); reshuffle_broadcastTensor(vecMatDeltaSquare.getData(), matDeltaSquare, nbStencilRow, nbStencilCol, 0lu, PLIB_VECTOR_SIZE_FLOAT); float * ptrVecMatStencil = vecMatDeltaSquare.getData(); // size_t nbVecRow(tmpVecInV.getFullNbRow()), nbVecCol(tmpVecInV.getNbCol()); size_t blockSizeRow(40lu), blockSizeCol(80lu); std::vector<PBlock<float> > vecBlockOutU, vecBlockOutV, vecBlockInU, vecBlockInV; micro_benchmarkAutoNsPrint("evaluate GrayScott reaction, intrinsics block", nbElement, grayscott_propagation_block, tmpVecOutU, tmpVecOutV, tmpVecInU, tmpVecInV, vecBlockOutU, vecBlockOutV, vecBlockInU, vecBlockInV, blockSizeRow, blockSizeCol, ptrVecMatStencil, nbStencilRow, nbStencilCol, diffudionRateU, diffusionRateV, feedRate, killRate, dt); } int main(int argc, char** argv){ return micro_benchmarkParseArg(argc, argv, evaluateGrayScott); } |
Previous La fonction principale du programme |
Parent Notre fichier main_intrinsics_block.cpp |
Outline | Next Le fichier CMakeLists.txt |