6.1.4 : CMakeLists.txt

Écrivons le fichier CMakeLists.txt :



On commence par définir le projet :
1
2
project(PERFORMANCE_WITH_NAN)
cmake_minimum_required(VERSION 3.0)
Ensuite, on inclue les macros que l'on a déjà développé pour le produit de hadamard :
1
include(${CMAKE_SOURCE_DIR}/Examples/1-HadamardProduct/multiplePerfTest.cmake)
On définie les différentes tailles de vecteurs qui seront les différents points de nos graphes (attention à avoir des multiples de 8 pour que les tests en fonctions intrisèques se passent bien) :
1
set(CONFIG_SGEMM "16, 32, 64, 112")
Définissions ce qu'est un NaN (qui se signal) :
1
set(NAN_DEF "std::numeric_limits<float>::signaling_NaN()")
Définissions ce qu'est un infini :
1
set(INF_DEF "std::numeric_limits<float>::infinity()")
Définissons un nombre dénormalisé :
1
set(DENORM_DEF "std::numeric_limits<float>::denorm_min()")
Testons avec le plus petit nombre possible (si on le multiplie par lui même il sera dénormalisé) :
1
set(MIN_TO_DENORM_DEF "std::numeric_limits<float>::min()")
Voici les trois sources de base pour tous les tests de performances de cette partie :
1
2
3
4
set(SGEMM_BASE_SRC sgemm_base.cpp main_sgemm.cpp)
set(SGEMM_SWAP_SRC sgemm_swap.cpp main_sgemm.cpp)
set(SGEMM_VECTORIZE_SRC sgemm_vectorize.cpp main_sgemm.cpp)
set(SGEMM_INTRINSICS_SRC sgemm_intrinsics.cpp main_sgemm.cpp)
Créons les tests de références :
1
2
3
4
multiplePerfTestLogX("sgemmBase" sgemm base "" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC})
multiplePerfTestLogX("sgemmSwap" sgemm swap "" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC})
multiplePerfTestLogX("sgemmVectorize" sgemm vectorize "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC})
multiplePerfTestLogX("sgemmIntrinsics" sgemm intrinsics "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC})
Nous pouvons également ajouter des graphes (mais il faut faire attention au nom des programmes que l'on compoare) :
1
2
phoenix_plotPerfLogX("cmpSgemmSafe03" sgemm_base_O3 sgemm_swap_O3 sgemm_vectorize_O3 sgemm_intrinsics_O3)
phoenix_plotPerfLogX("cmpSgemmSafe0fast" sgemm_base_Ofast sgemm_swap_Ofast sgemm_vectorize_Ofast sgemm_intrinsics_Ofast)
Testons avec des NaN :
1
2
3
4
5
6
7
8
9
10
multiplePerfTestValueLogX("sgemmBaseNanO3" sgemm base nan -O3 "${NAN_DEF}"
			"" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC})
multiplePerfTestValueLogX("sgemmSwapNanO3" sgemm swap nan -O3 "${NAN_DEF}"
			"" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC})

multiplePerfTestValueLogX("sgemmVectorizeNanO3" sgemm vectorize nan -O3 "${NAN_DEF}"
			"${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC})

multiplePerfTestValueLogX("sgemmIntrinsicsNanO3" sgemm intrinsics nan -O3  "${NAN_DEF}"
			"${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC})
Testons avec des Inf :
1
2
3
4
5
6
7
8
9
10
multiplePerfTestValueLogX("sgemmBaseInfO3" sgemm base inf -O3 "${INF_DEF}"
			"" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC})
multiplePerfTestValueLogX("sgemmSwapInfO3" sgemm swap inf -O3 "${INF_DEF}"
			"" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC})

multiplePerfTestValueLogX("sgemmVectorizeInfO3" sgemm vectorize inf -O3 "${INF_DEF}"
			"${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC})

multiplePerfTestValueLogX("sgemmIntrinsicsInfO3" sgemm intrinsics inf -O3  "${INF_DEF}"
			"${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC})
Testons avec des nombres dénormalisés :
1
2
3
4
5
6
7
8
9
10
multiplePerfTestValueLogX("sgemmBaseDenormO3" sgemm base denorm -O3 "${DENORM_DEF}"
			"" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC})
multiplePerfTestValueLogX("sgemmSwapDenormO3" sgemm swap denorm -O3 "${DENORM_DEF}"
			"" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC})

multiplePerfTestValueLogX("sgemmVectorizeDenormO3" sgemm vectorize denorm -O3 "${DENORM_DEF}"
			"${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC})

multiplePerfTestValueLogX("sgemmIntrinicsDenormO3" sgemm intrinsics denorm -O3 "${DENORM_DEF}"
			"${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC})
Essayons de régler le problème avec des nombres dénormalisés :
1
2
3
4
5
6
7
8
9
10
multiplePerfTestValueLogX("sgemmBaseDenormDazO3" sgemm base denormDaz -O3 "${DENORM_DEF}"
			"-mfpmath=sse" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC})
multiplePerfTestValueLogX("sgemmSwapDenormDazO3" sgemm swap denormDaz -O3 "${DENORM_DEF}"
			"-mfpmath=sse" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC})

multiplePerfTestValueLogX("sgemmVectorizeDenormDazO3" sgemm vectorize denormDaz -O3 "${DENORM_DEF}"
			"${VECTORIZED_OPTION} ${DENORM_DAZ_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC})

multiplePerfTestValueLogX("sgemmIntrinicsDenormDazO3" sgemm intrinsics denormDaz -O3 "${DENORM_DEF}"
			"${VECTORIZED_OPTION} ${DENORM_DAZ_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC})
Voyons ce qu'il se passe lorsque le calcul produit des nombres dénormalisés :
1
2
3
4
5
6
7
8
9
10
multiplePerfTestValueLogX("sgemmBaseMakeDenormO3" sgemm base make_denorm -O3 "${MIN_TO_DENORM_DEF}"
			"" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC})
multiplePerfTestValueLogX("sgemmSwapMakeDenormO3" sgemm swap make_denorm -O3 "${MIN_TO_DENORM_DEF}"
			"" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC})

multiplePerfTestValueLogX("sgemmVectorizeMakeDenormO3" sgemm vectorize make_denorm -O3 "${MIN_TO_DENORM_DEF}"
			"${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC})

multiplePerfTestValueLogX("sgemmIntrinicsMakeDenormO3" sgemm intrinsics make_denorm -O3 "${MIN_TO_DENORM_DEF}"
			"${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC})


Le fichier CMakeLists.txt complet :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
project(PERFORMANCE_WITH_NAN)
cmake_minimum_required(VERSION 3.0)

include(${CMAKE_SOURCE_DIR}/Examples/1-HadamardProduct/multiplePerfTest.cmake)

set(CONFIG_SGEMM "16, 32, 64, 112")

set(NAN_DEF "std::numeric_limits<float>::signaling_NaN()")

set(INF_DEF "std::numeric_limits<float>::infinity()")

set(DENORM_DEF "std::numeric_limits<float>::denorm_min()")

set(MIN_TO_DENORM_DEF "std::numeric_limits<float>::min()")

set(SGEMM_BASE_SRC sgemm_base.cpp main_sgemm.cpp)
set(SGEMM_SWAP_SRC sgemm_swap.cpp main_sgemm.cpp)
set(SGEMM_VECTORIZE_SRC sgemm_vectorize.cpp main_sgemm.cpp)
set(SGEMM_INTRINSICS_SRC sgemm_intrinsics.cpp main_sgemm.cpp)

multiplePerfTestLogX("sgemmBase" sgemm base "" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC})
multiplePerfTestLogX("sgemmSwap" sgemm swap "" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC})
multiplePerfTestLogX("sgemmVectorize" sgemm vectorize "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC})
multiplePerfTestLogX("sgemmIntrinsics" sgemm intrinsics "${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC})

phoenix_plotPerfLogX("cmpSgemmSafe03" sgemm_base_O3 sgemm_swap_O3 sgemm_vectorize_O3 sgemm_intrinsics_O3)
phoenix_plotPerfLogX("cmpSgemmSafe0fast" sgemm_base_Ofast sgemm_swap_Ofast sgemm_vectorize_Ofast sgemm_intrinsics_Ofast)

multiplePerfTestValueLogX("sgemmBaseNanO3" sgemm base nan -O3 "${NAN_DEF}"
			"" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC})
multiplePerfTestValueLogX("sgemmSwapNanO3" sgemm swap nan -O3 "${NAN_DEF}"
			"" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC})

multiplePerfTestValueLogX("sgemmVectorizeNanO3" sgemm vectorize nan -O3 "${NAN_DEF}"
			"${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC})

multiplePerfTestValueLogX("sgemmIntrinsicsNanO3" sgemm intrinsics nan -O3  "${NAN_DEF}"
			"${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC})

multiplePerfTestValueLogX("sgemmBaseInfO3" sgemm base inf -O3 "${INF_DEF}"
			"" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC})
multiplePerfTestValueLogX("sgemmSwapInfO3" sgemm swap inf -O3 "${INF_DEF}"
			"" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC})

multiplePerfTestValueLogX("sgemmVectorizeInfO3" sgemm vectorize inf -O3 "${INF_DEF}"
			"${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC})

multiplePerfTestValueLogX("sgemmIntrinsicsInfO3" sgemm intrinsics inf -O3  "${INF_DEF}"
			"${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC})

multiplePerfTestValueLogX("sgemmBaseDenormO3" sgemm base denorm -O3 "${DENORM_DEF}"
			"" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC})
multiplePerfTestValueLogX("sgemmSwapDenormO3" sgemm swap denorm -O3 "${DENORM_DEF}"
			"" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC})

multiplePerfTestValueLogX("sgemmVectorizeDenormO3" sgemm vectorize denorm -O3 "${DENORM_DEF}"
			"${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC})

multiplePerfTestValueLogX("sgemmIntrinicsDenormO3" sgemm intrinsics denorm -O3 "${DENORM_DEF}"
			"${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC})

multiplePerfTestValueLogX("sgemmBaseDenormDazO3" sgemm base denormDaz -O3 "${DENORM_DEF}"
			"-mfpmath=sse" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC})
multiplePerfTestValueLogX("sgemmSwapDenormDazO3" sgemm swap denormDaz -O3 "${DENORM_DEF}"
			"-mfpmath=sse" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC})

multiplePerfTestValueLogX("sgemmVectorizeDenormDazO3" sgemm vectorize denormDaz -O3 "${DENORM_DEF}"
			"${VECTORIZED_OPTION} ${DENORM_DAZ_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC})

multiplePerfTestValueLogX("sgemmIntrinicsDenormDazO3" sgemm intrinsics denormDaz -O3 "${DENORM_DEF}"
			"${VECTORIZED_OPTION} ${DENORM_DAZ_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC})

multiplePerfTestValueLogX("sgemmBaseMakeDenormO3" sgemm base make_denorm -O3 "${MIN_TO_DENORM_DEF}"
			"" "${CONFIG_SGEMM}" ${SGEMM_BASE_SRC})
multiplePerfTestValueLogX("sgemmSwapMakeDenormO3" sgemm swap make_denorm -O3 "${MIN_TO_DENORM_DEF}"
			"" "${CONFIG_SGEMM}" ${SGEMM_SWAP_SRC})

multiplePerfTestValueLogX("sgemmVectorizeMakeDenormO3" sgemm vectorize make_denorm -O3 "${MIN_TO_DENORM_DEF}"
			"${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_VECTORIZE_SRC})

multiplePerfTestValueLogX("sgemmIntrinicsMakeDenormO3" sgemm intrinsics make_denorm -O3 "${MIN_TO_DENORM_DEF}"
			"${VECTORIZED_OPTION}" "${CONFIG_SGEMM}" ${SGEMM_INTRINSICS_SRC})


Vous pouvez le télécharger ici.

Il n'a fallu que quelques lignes pour créer tous les tests dont nous avons besoin.