Previous The sgemm_intrinsics.cpp file |
Parent Intrinsics implementation |
Outline | Next The CMakeLists.txt file |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
#include <iostream> #include "asterics_hpc.h" #include "sgemm_intrinsics.h" using namespace std; ///Get the number of cycles per elements of the reduction /** @param nbElement : number of elements of the tables * @param nbRepetition : number of repetition to evaluate the function reduction */ void evaluateSgemm(long unsigned int nbElement, long unsigned int nbRepetition){ float * matX = (float*)asterics_malloc(sizeof(float)*nbElement*nbElement); float * matY = (float*)asterics_malloc(sizeof(float)*nbElement*nbElement); float * matOut = (float*)asterics_malloc(sizeof(float)*nbElement*nbElement); for(long unsigned int i(0lu); i < nbElement*nbElement; ++i){ matX[i] = (float)(i*32lu%17lu); matY[i] = (float)(i*77lu%7lu); } long unsigned int beginTime(rdtsc()); for(long unsigned int i(0lu); i < nbRepetition; ++i){ sgemm(matOut, matX, matY, nbElement); } long unsigned int elapsedTime((double)(rdtsc() - beginTime)/((double)nbRepetition)); double cyclePerElement(((double)elapsedTime)/((double)(nbElement*nbElement))); cout << "evaluateSgemm : nbElement = "<<nbElement<<", cyclePerElement = " << cyclePerElement << " cy/el, elapsedTime = " << elapsedTime << " cy" << endl; cerr << nbElement << "\t" << cyclePerElement << "\t" << elapsedTime << endl; asterics_free(matOut); asterics_free(matY); asterics_free(matX); } int main(int argc, char** argv){ cout << "SGEMM Intrinsics" << endl; evaluateSgemm(16lu, 100000lu); evaluateSgemm(24lu, 100000lu); evaluateSgemm(32lu, 100000lu); evaluateSgemm(56lu, 10000lu); evaluateSgemm(80lu, 10000lu); evaluateSgemm(104lu, 10000lu); return 0; } |
Previous The sgemm_intrinsics.cpp file |
Parent Intrinsics implementation |
Outline | Next The CMakeLists.txt file |