Previous The sgemm_intrinsics_pitch.cpp file |
Parent Intrinsics implementation with a pitch |
Outline | Next The CMakeLists.txt file |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
#include <iostream> #include "string.h" #include "asterics_hpc.h" #include "sgemm_intrinsics_pitch.h" using namespace std; ///Get the number of cycles per elements of the reduction /** @param nbElement : number of elements of the tables * @param nbRepetition : number of repetition to evaluate the function reduction */ void evaluateSgemm(long unsigned int nbElement, long unsigned int nbRepetition){ float * matX = asterics_malloc2f(nbElement, nbElement); float * matY = asterics_malloc2f(nbElement, nbElement); float * matOut = asterics_malloc2f(nbElement, nbElement); long unsigned int pitch(getPitch(nbElement)); long unsigned int sizeCol(nbElement + pitch); for(long unsigned int i(0lu); i < nbElement; ++i){ for(long unsigned int j(0lu); j < nbElement; ++j){ long unsigned int index(i*nbElement + j); matX[i*sizeCol + j] = (float)(index*32lu%17lu); matY[i*sizeCol + j] = (float)(index*77lu%7lu); } } long unsigned int beginTime(rdtsc()); for(long unsigned int i(0lu); i < nbRepetition; ++i){ sgemm(matOut, matX, matY, nbElement, pitch); } long unsigned int elapsedTime((double)(rdtsc() - beginTime)/((double)nbRepetition)); double cyclePerElement(((double)elapsedTime)/((double)(nbElement*nbElement))); cout << "evaluateSgemm : nbElement = "<<nbElement<<", cyclePerElement = " << cyclePerElement << " cy/el, elapsedTime = " << elapsedTime << " cy" << endl; cerr << nbElement << "\t" << cyclePerElement << "\t" << elapsedTime << endl; asterics_free(matOut); asterics_free(matY); asterics_free(matX); } int main(int argc, char** argv){ cout << "SGEMM Intrinsics Pitch" << endl; evaluateSgemm(10lu, 100000lu); evaluateSgemm(16lu, 100000lu); evaluateSgemm(24lu, 100000lu); evaluateSgemm(32lu, 100000lu); evaluateSgemm(40lu, 100000lu); evaluateSgemm(56lu, 10000lu); evaluateSgemm(80lu, 10000lu); evaluateSgemm(90lu, 10000lu); evaluateSgemm(104lu, 10000lu); return 0; } |
Previous The sgemm_intrinsics_pitch.cpp file |
Parent Intrinsics implementation with a pitch |
Outline | Next The CMakeLists.txt file |