13
13
if not skip_tests :
14
14
try :
15
15
import cupy
16
+
16
17
skip_tests = False
17
18
except ImportError :
18
19
skip_tests = True
19
20
20
21
from .kernels import kernel_string
21
22
23
+
22
24
def launch (kernel , args = ()):
23
25
kernel ((1 ,), (1 ,), args )
24
26
27
+
25
28
# Measure launch latency with no parmaeters
26
29
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
27
30
@pytest .mark .benchmark (group = "cupy" )
28
31
def test_launch_latency_empty_kernel (benchmark ):
29
32
module = cupy .RawModule (code = kernel_string )
30
- kernel = module .get_function (' empty_kernel' )
33
+ kernel = module .get_function (" empty_kernel" )
31
34
32
35
stream = cupy .cuda .stream .Stream (non_blocking = True )
33
36
34
37
with stream :
35
38
benchmark (launch , kernel )
36
39
stream .synchronize ()
37
40
41
+
38
42
# Measure launch latency with a single parameter
39
43
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
40
44
@pytest .mark .benchmark (group = "cupy" )
41
45
def test_launch_latency_small_kernel (benchmark ):
42
46
module = cupy .RawModule (code = kernel_string )
43
- kernel = module .get_function (' small_kernel' )
47
+ kernel = module .get_function (" small_kernel" )
44
48
cupy .cuda .set_allocator ()
45
49
arg = cupy .cuda .alloc (ctypes .sizeof (ctypes .c_float ))
46
50
@@ -50,12 +54,13 @@ def test_launch_latency_small_kernel(benchmark):
50
54
benchmark (launch , kernel , (arg ,))
51
55
stream .synchronize ()
52
56
57
+
53
58
# Measure launch latency with many parameters using builtin parameter packing
54
59
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
55
60
@pytest .mark .benchmark (group = "cupy" )
56
61
def test_launch_latency_small_kernel_512_args (benchmark ):
57
62
module = cupy .RawModule (code = kernel_string )
58
- kernel = module .get_function (' small_kernel_512_args' )
63
+ kernel = module .get_function (" small_kernel_512_args" )
59
64
cupy .cuda .set_allocator ()
60
65
61
66
args = []
@@ -69,12 +74,13 @@ def test_launch_latency_small_kernel_512_args(benchmark):
69
74
benchmark (launch , kernel , args )
70
75
stream .synchronize ()
71
76
77
+
72
78
# Measure launch latency with many parameters using builtin parameter packing
73
79
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
74
80
@pytest .mark .benchmark (group = "cupy" )
75
81
def test_launch_latency_small_kernel_512_bools (benchmark ):
76
82
module = cupy .RawModule (code = kernel_string )
77
- kernel = module .get_function (' small_kernel_512_bools' )
83
+ kernel = module .get_function (" small_kernel_512_bools" )
78
84
cupy .cuda .set_allocator ()
79
85
80
86
args = [True ] * 512
@@ -86,12 +92,13 @@ def test_launch_latency_small_kernel_512_bools(benchmark):
86
92
benchmark (launch , kernel , args )
87
93
stream .synchronize ()
88
94
95
+
89
96
# Measure launch latency with many parameters using builtin parameter packing
90
97
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
91
98
@pytest .mark .benchmark (group = "cupy" )
92
99
def test_launch_latency_small_kernel_512_doubles (benchmark ):
93
100
module = cupy .RawModule (code = kernel_string )
94
- kernel = module .get_function (' small_kernel_512_doubles' )
101
+ kernel = module .get_function (" small_kernel_512_doubles" )
95
102
cupy .cuda .set_allocator ()
96
103
97
104
args = [1.2345 ] * 512
@@ -103,12 +110,13 @@ def test_launch_latency_small_kernel_512_doubles(benchmark):
103
110
benchmark (launch , kernel , args )
104
111
stream .synchronize ()
105
112
113
+
106
114
# Measure launch latency with many parameters using builtin parameter packing
107
115
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
108
116
@pytest .mark .benchmark (group = "cupy" )
109
117
def test_launch_latency_small_kernel_512_ints (benchmark ):
110
118
module = cupy .RawModule (code = kernel_string )
111
- kernel = module .get_function (' small_kernel_512_ints' )
119
+ kernel = module .get_function (" small_kernel_512_ints" )
112
120
cupy .cuda .set_allocator ()
113
121
114
122
args = [123 ] * 512
@@ -120,12 +128,13 @@ def test_launch_latency_small_kernel_512_ints(benchmark):
120
128
benchmark (launch , kernel , args )
121
129
stream .synchronize ()
122
130
131
+
123
132
# Measure launch latency with many parameters using builtin parameter packing
124
133
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
125
134
@pytest .mark .benchmark (group = "cupy" )
126
135
def test_launch_latency_small_kernel_512_bytes (benchmark ):
127
136
module = cupy .RawModule (code = kernel_string )
128
- kernel = module .get_function (' small_kernel_512_chars' )
137
+ kernel = module .get_function (" small_kernel_512_chars" )
129
138
cupy .cuda .set_allocator ()
130
139
131
140
args = [127 ] * 512
@@ -137,12 +146,13 @@ def test_launch_latency_small_kernel_512_bytes(benchmark):
137
146
benchmark (launch , kernel , args )
138
147
stream .synchronize ()
139
148
149
+
140
150
# Measure launch latency with many parameters using builtin parameter packing
141
151
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
142
152
@pytest .mark .benchmark (group = "cupy" )
143
153
def test_launch_latency_small_kernel_512_longlongs (benchmark ):
144
154
module = cupy .RawModule (code = kernel_string )
145
- kernel = module .get_function (' small_kernel_512_longlongs' )
155
+ kernel = module .get_function (" small_kernel_512_longlongs" )
146
156
cupy .cuda .set_allocator ()
147
157
148
158
args = [9223372036854775806 ] * 512
@@ -154,12 +164,13 @@ def test_launch_latency_small_kernel_512_longlongs(benchmark):
154
164
benchmark (launch , kernel , args )
155
165
stream .synchronize ()
156
166
167
+
157
168
# Measure launch latency with many parameters using builtin parameter packing
158
169
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
159
170
@pytest .mark .benchmark (group = "cupy" )
160
171
def test_launch_latency_small_kernel_256_args (benchmark ):
161
172
module = cupy .RawModule (code = kernel_string )
162
- kernel = module .get_function (' small_kernel_256_args' )
173
+ kernel = module .get_function (" small_kernel_256_args" )
163
174
cupy .cuda .set_allocator ()
164
175
165
176
args = []
@@ -173,12 +184,13 @@ def test_launch_latency_small_kernel_256_args(benchmark):
173
184
benchmark (launch , kernel , args )
174
185
stream .synchronize ()
175
186
187
+
176
188
# Measure launch latency with many parameters using builtin parameter packing
177
189
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
178
190
@pytest .mark .benchmark (group = "cupy" )
179
191
def test_launch_latency_small_kernel_16_args (benchmark ):
180
192
module = cupy .RawModule (code = kernel_string )
181
- kernel = module .get_function (' small_kernel_16_args' )
193
+ kernel = module .get_function (" small_kernel_16_args" )
182
194
cupy .cuda .set_allocator ()
183
195
184
196
args = []
0 commit comments