8
8
#include < cstdint>
9
9
#include < cstring>
10
10
#include < iostream>
11
+ enum USM_TEST_RES { USM_ALLOC_FAIL = -1 , USM_TEST_PASS = 0 , USM_TEST_FAIL = 1 };
11
12
12
- class KernelTestMemcpy ;
13
+ template <class DeviceMemcpyTest >
14
+ void device_memcpy_invoke (sycl::queue &deviceQueue, uint8_t *dest,
15
+ const uint8_t *src, size_t n) {
16
+ deviceQueue
17
+ .submit ([&](sycl::handler &cgh) {
18
+ cgh.single_task <DeviceMemcpyTest>([=]() { memcpy (dest, src, n); });
19
+ })
20
+ .wait ();
21
+ }
13
22
23
+ class KernelTestMemcpy ;
14
24
bool kernel_test_memcpy (sycl::queue &deviceQueue) {
15
25
bool success = true ;
16
26
char src[20 ] = " abcdefg012345xyzvvv" ;
17
- char dst[20 ];
27
+ char dst[20 ] = {
28
+ 0 ,
29
+ };
18
30
{
19
31
sycl::buffer<char , 1 > buffer1 (src, sycl::range<1 >(20 ));
20
32
sycl::buffer<char , 1 > buffer2 (dst, sycl::range<1 >(20 ));
@@ -36,6 +48,101 @@ bool kernel_test_memcpy(sycl::queue &deviceQueue) {
36
48
return success;
37
49
}
38
50
51
+ class KernelTestMemcpyInit ;
52
+ class KernelTestMemcpyUSM0 ;
53
+ class KernelTestMemcpyUSM1 ;
54
+ class KernelTestMemcpyUSM2 ;
55
+ class KernelTestMemcpyUSM3 ;
56
+ class KernelTestMemcpyUSM4 ;
57
+ USM_TEST_RES kernel_test_memcpy_usm (sycl::queue &deviceQueue) {
58
+ sycl::device dev = deviceQueue.get_device ();
59
+ sycl::context ctxt = deviceQueue.get_context ();
60
+ uint8_t *usm_shared_dest =
61
+ (uint8_t *)sycl::aligned_alloc_shared (alignof (uint32_t ), 32 , dev, ctxt);
62
+ uint8_t *usm_shared_src =
63
+ (uint8_t *)sycl::aligned_alloc_shared (alignof (uint32_t ), 32 , dev, ctxt);
64
+ if (usm_shared_dest == nullptr || usm_shared_src == nullptr )
65
+ return USM_ALLOC_FAIL;
66
+ // Init src usm memory
67
+ char *host_init_str = " abcdefghijklmnopqrstuvwxyz" ;
68
+ size_t str_len = strlen (host_init_str);
69
+ deviceQueue
70
+ .submit ([&](sycl::handler &cgh) {
71
+ cgh.single_task <class KernelTestMemcpyInit >([=]() {
72
+ char c = ' a' ;
73
+ for (size_t idx = 0 ; idx < 32 ; ++idx)
74
+ usm_shared_src[idx] = c++;
75
+ });
76
+ })
77
+ .wait ();
78
+ int usm_memcheck_pass = 0 ;
79
+ // Memcpy 3 bytest from aligned src to aligned dest
80
+ device_memcpy_invoke<KernelTestMemcpyUSM0>(deviceQueue, usm_shared_dest,
81
+ usm_shared_src, 3 );
82
+ usm_memcheck_pass = memcmp (usm_shared_dest, usm_shared_src, 3 );
83
+ if (usm_memcheck_pass != 0 ) {
84
+ sycl::free (usm_shared_src, ctxt);
85
+ sycl::free (usm_shared_dest, ctxt);
86
+ return USM_TEST_FAIL;
87
+ }
88
+
89
+ // Memcpy 15 bytest from aligned src to aligned dest
90
+ device_memcpy_invoke<KernelTestMemcpyUSM1>(deviceQueue, usm_shared_dest,
91
+ usm_shared_src, 15 );
92
+ usm_memcheck_pass = memcmp (usm_shared_dest, usm_shared_src, 15 );
93
+ if (usm_memcheck_pass != 0 ) {
94
+ sycl::free (usm_shared_src, ctxt);
95
+ sycl::free (usm_shared_dest, ctxt);
96
+ return USM_TEST_FAIL;
97
+ }
98
+
99
+ deviceQueue
100
+ .submit ([&](sycl::handler &cgh) { cgh.memset (usm_shared_dest, 0 , 32 ); })
101
+ .wait ();
102
+ // Memcpy 1 byte from unaligned src to unaligned dest;
103
+ device_memcpy_invoke<KernelTestMemcpyUSM2>(deviceQueue, usm_shared_dest + 1 ,
104
+ usm_shared_src + 1 , 1 );
105
+ usm_memcheck_pass = memcmp (usm_shared_dest + 1 , usm_shared_src + 1 , 1 );
106
+ if (usm_memcheck_pass != 0 ) {
107
+ sycl::free (usm_shared_src, ctxt);
108
+ sycl::free (usm_shared_dest, ctxt);
109
+ return USM_TEST_FAIL;
110
+ }
111
+
112
+ // Memcpy 12 bytes from unaligned src to unalinged dest;
113
+ device_memcpy_invoke<KernelTestMemcpyUSM3>(deviceQueue, usm_shared_dest + 3 ,
114
+ usm_shared_src + 3 , 12 );
115
+ usm_memcheck_pass = memcmp (usm_shared_dest + 3 , usm_shared_src + 3 , 12 );
116
+ if (usm_memcheck_pass != 0 ) {
117
+ sycl::free (usm_shared_src, ctxt);
118
+ sycl::free (usm_shared_dest, ctxt);
119
+ return USM_TEST_FAIL;
120
+ }
121
+
122
+ // Memcpy 7 bytes from unaligned src to unaligned dest
123
+ device_memcpy_invoke<KernelTestMemcpyUSM4>(deviceQueue, usm_shared_dest + 9 ,
124
+ usm_shared_src + 7 , 7 );
125
+ usm_memcheck_pass = memcmp (usm_shared_dest + 9 , usm_shared_src + 7 , 7 );
126
+ if (usm_memcheck_pass != 0 ) {
127
+ sycl::free (usm_shared_src, ctxt);
128
+ sycl::free (usm_shared_dest, ctxt);
129
+ return USM_TEST_FAIL;
130
+ }
131
+ sycl::free (usm_shared_src, ctxt);
132
+ sycl::free (usm_shared_dest, ctxt);
133
+ return USM_TEST_PASS;
134
+ }
135
+
136
+ template <class DeviceMemsetTest >
137
+ void device_memset_invoke (sycl::queue &deviceQueue, uint8_t *dest, int c,
138
+ size_t n) {
139
+ deviceQueue
140
+ .submit ([&](sycl::handler &cgh) {
141
+ cgh.single_task <DeviceMemsetTest>([=]() { memset (dest, c, n); });
142
+ })
143
+ .wait ();
144
+ }
145
+
39
146
class KernelTestMemset ;
40
147
bool kernel_test_memset (sycl::queue &deviceQueue) {
41
148
bool success = true ;
@@ -64,6 +171,71 @@ bool kernel_test_memset(sycl::queue &deviceQueue) {
64
171
return success;
65
172
}
66
173
174
+ class KernelTestMemsetUSM0 ;
175
+ class KernelTestMemsetUSM1 ;
176
+ class KernelTestMemsetUSM2 ;
177
+ class KernelTestMemsetUSM3 ;
178
+
179
+ USM_TEST_RES kernel_test_memset_usm (sycl::queue &deviceQueue) {
180
+ sycl::device dev = deviceQueue.get_device ();
181
+ sycl::context ctxt = deviceQueue.get_context ();
182
+ uint8_t host_ref_buffer[32 ];
183
+ uint8_t *usm_shared_buffer =
184
+ (uint8_t *)sycl::aligned_alloc_shared (alignof (uint32_t ), 32 , dev, ctxt);
185
+ if (usm_shared_buffer == nullptr )
186
+ return USM_ALLOC_FAIL;
187
+
188
+ deviceQueue
189
+ .submit (
190
+ [&](sycl::handler &cgh) { cgh.memset (usm_shared_buffer, 0xFF , 32 ); })
191
+ .wait ();
192
+
193
+ int usm_memcheck_pass = 0 ;
194
+ // memset 17 bytes on aligned address
195
+ device_memset_invoke<KernelTestMemsetUSM0>(deviceQueue, usm_shared_buffer,
196
+ 0xEE , 17 );
197
+ memset (host_ref_buffer, 0xFF , 32 );
198
+ memset (host_ref_buffer, 0xEE , 17 );
199
+ usm_memcheck_pass = memcmp (host_ref_buffer, usm_shared_buffer, 32 );
200
+ if (usm_memcheck_pass != 0 ) {
201
+ sycl::free (usm_shared_buffer, ctxt);
202
+ return USM_TEST_FAIL;
203
+ }
204
+
205
+ // memset 3 bytes on aligned address
206
+ device_memset_invoke<KernelTestMemsetUSM1>(deviceQueue, usm_shared_buffer,
207
+ 0xCC , 3 );
208
+ memset (host_ref_buffer, 0xCC , 3 );
209
+ usm_memcheck_pass = memcmp (host_ref_buffer, usm_shared_buffer, 32 );
210
+ if (usm_memcheck_pass != 0 ) {
211
+ sycl::free (usm_shared_buffer, ctxt);
212
+ return USM_TEST_FAIL;
213
+ }
214
+
215
+ // memset 15 bytes on unaligned address
216
+ device_memset_invoke<KernelTestMemsetUSM2>(deviceQueue, usm_shared_buffer + 1 ,
217
+ 0xAA , 21 );
218
+ memset (host_ref_buffer + 1 , 0xAA , 21 );
219
+ usm_memcheck_pass = memcmp (host_ref_buffer, usm_shared_buffer, 32 );
220
+ if (usm_memcheck_pass != 0 ) {
221
+ sycl::free (usm_shared_buffer, ctxt);
222
+ return USM_TEST_FAIL;
223
+ }
224
+
225
+ // memset 2 bytes on unaligned address
226
+ device_memset_invoke<KernelTestMemsetUSM3>(deviceQueue,
227
+ usm_shared_buffer + 13 , 0xBB , 2 );
228
+ memset (host_ref_buffer + 13 , 0xBB , 2 );
229
+ usm_memcheck_pass = memcmp (host_ref_buffer, usm_shared_buffer, 32 );
230
+ if (usm_memcheck_pass != 0 ) {
231
+ sycl::free (usm_shared_buffer, ctxt);
232
+ return USM_TEST_FAIL;
233
+ }
234
+
235
+ sycl::free (usm_shared_buffer, ctxt);
236
+ return USM_TEST_PASS;
237
+ }
238
+
67
239
class KernelTestMemcmp ;
68
240
bool kernel_test_memcmp (sycl::queue &deviceQueue) {
69
241
bool success = true ;
@@ -262,10 +434,30 @@ bool kernel_test_memcpy_addr_space(sycl::queue &deviceQueue) {
262
434
int main () {
263
435
bool success = true ;
264
436
sycl::queue deviceQueue;
437
+ sycl::device dev = deviceQueue.get_device ();
438
+ bool shared_usm_enabled = false ;
439
+ USM_TEST_RES usm_tres;
440
+ if (dev.get_info <sycl::info::device::usm_shared_allocations>())
441
+ shared_usm_enabled = true ;
265
442
success = kernel_test_memcpy (deviceQueue);
443
+ if (shared_usm_enabled) {
444
+ usm_tres = kernel_test_memcpy_usm (deviceQueue);
445
+ if (usm_tres == USM_ALLOC_FAIL)
446
+ std::cout << " USM shared memory alloc failed, USM tests skipped"
447
+ << std::endl;
448
+ else if (usm_tres == USM_TEST_FAIL)
449
+ success = false ;
450
+ }
266
451
assert (((void )" memcpy test failed!" , success));
267
-
268
452
success = kernel_test_memset (deviceQueue);
453
+ if (shared_usm_enabled) {
454
+ usm_tres = kernel_test_memset_usm (deviceQueue);
455
+ if (usm_tres == USM_ALLOC_FAIL)
456
+ std::cout << " USM shared memory alloc failed, USM tests skipped"
457
+ << std::endl;
458
+ else if (usm_tres == USM_TEST_FAIL)
459
+ success = false ;
460
+ }
269
461
assert (((void )" memset test failed!" , success));
270
462
271
463
success = kernel_test_memcmp (deviceQueue);
0 commit comments