10.7.4.2 : Sgemm with our intrinsics implementation

Now, let's write the sgemmIntrinsicsPitchPython.py file to test our implementation :

We need also to import several packages :
1
2
3
import sys
import astericshpc
import sgemmpython
The function to initialise tables :
1
2
3
4
5
6
def allocInitMatrix(nbElement):
	mat = astericshpc.allocMatrix(nbElement, nbElement)
	for i in range(0, nbElement):
		for j in range(0, nbElement):
			mat[i][j] = float((i*nbElement + j)*32%17)
	return mat
The function to evaluate performances is built the same way such as the C++ one :
1
2
3
4
5
6
7
8
9
10
11
12
13
14
def getTimeFunctionSize(nbRepetition, nbElement):
	tabX = allocInitMatrix(nbElement)
	tabY = allocInitMatrix(nbElement)
	tabRes = astericshpc.allocMatrix(nbElement, nbElement)
	
	timeBegin = astericshpc.rdtsc()
	for i in range(0, nbRepetition):
		sgemmpython.sgemm(tabRes, tabX, tabY)
	
	timeEnd = astericshpc.rdtsc()
	elapsedTime = float(timeEnd - timeBegin)/float(nbRepetition)
	elapsedTimePerElement = elapsedTime/float(nbElement*nbElement)
	print("nbElement =",nbElement,", elapsedTimePerElement =",elapsedTimePerElement,"cy/el",", elapsedTime =",elapsedTime,"cy")
	print(str(nbElement) + "\t" + str(elapsedTimePerElement) + "\t" + str(elapsedTime),file=sys.stderr)
Then, we have a function to make all the points with a list of sizes :
1
2
3
def makeElapsedTimeValue(listSize, nbRepetition):
	for val in listSize:
		getTimeFunctionSize(nbRepetition, val)
Finally, we call the performances tests only if this script is executed as a main file and not if it is included by an other file :
1
2
3
4
5
6
7
8
9
10
11
if __name__ == "__main__":
	listSize = [	10,
			16,
			24,
			32,
			40,
			56,
			80,
			90,
			104]
	makeElapsedTimeValue(listSize, 100000)
The full sgemmIntrinsicsPitchPython.py file :
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
'''
	Auteur : Pierre Aubert
	Mail : aubertp7@gmail.com
	Licence : CeCILL-C
'''

import sys
import astericshpc
import sgemmpython

def allocInitMatrix(nbElement):
	mat = astericshpc.allocMatrix(nbElement, nbElement)
	for i in range(0, nbElement):
		for j in range(0, nbElement):
			mat[i][j] = float((i*nbElement + j)*32%17)
	return mat

def getTimeFunctionSize(nbRepetition, nbElement):
	tabX = allocInitMatrix(nbElement)
	tabY = allocInitMatrix(nbElement)
	tabRes = astericshpc.allocMatrix(nbElement, nbElement)
	
	timeBegin = astericshpc.rdtsc()
	for i in range(0, nbRepetition):
		sgemmpython.sgemm(tabRes, tabX, tabY)
	
	timeEnd = astericshpc.rdtsc()
	elapsedTime = float(timeEnd - timeBegin)/float(nbRepetition)
	elapsedTimePerElement = elapsedTime/float(nbElement*nbElement)
	print("nbElement =",nbElement,", elapsedTimePerElement =",elapsedTimePerElement,"cy/el",", elapsedTime =",elapsedTime,"cy")
	print(str(nbElement) + "\t" + str(elapsedTimePerElement) + "\t" + str(elapsedTime),file=sys.stderr)

def makeElapsedTimeValue(listSize, nbRepetition):
	for val in listSize:
		getTimeFunctionSize(nbRepetition, val)

if __name__ == "__main__":
	listSize = [	10,
			16,
			24,
			32,
			40,
			56,
			80,
			90,
			104]
	makeElapsedTimeValue(listSize, 100000)
You can download it here.