Skip to content

Commit 6e622a4

Browse files
fineg74bb-sycl
authored andcommitted
[SYCL][ESIMD] Test the fix of error when a scalar offset is provided as a parameter to the API (intel#1534)
1 parent 81333cc commit 6e622a4

16 files changed

+269
-2
lines changed

SYCL/ESIMD/api/svm_gather_scatter.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,11 @@ template <typename T, int N> bool test(queue &Q) {
6161
try {
6262
Q.submit([&](handler &CGH) {
6363
CGH.parallel_for(sycl::range<1>{1}, [=](id<1>) SYCL_ESIMD_KERNEL {
64+
#ifndef USE_SCALAR_OFFSET
6465
simd<Toffset, N> Offsets(0u, sizeof(T));
66+
#else
67+
Toffset Offsets = 0;
68+
#endif
6569
scatter<T, N>(Dst, Offsets, gather<T, N>(Src, Offsets));
6670
});
6771
}).wait();
@@ -72,7 +76,11 @@ template <typename T, int N> bool test(queue &Q) {
7276

7377
unsigned NumErrs = 0;
7478
for (int I = 0; I < N; ++I)
79+
#ifndef USE_SCALAR_OFFSET
7580
if (Dst[I] != Src[I])
81+
#else
82+
if ((Dst[I] != Src[I] && I == 0) || (I != 0 && Dst[I] != 0))
83+
#endif
7684
if (++NumErrs <= 10)
7785
std::cout << "failed at " << I << ": " << Dst[I]
7886
<< " (Dst) != " << Src[I] << " (Src)\n";
@@ -90,48 +98,59 @@ int main(void) {
9098
bool Pass = true;
9199

92100
Pass &= test<int8_t, 1>(Q);
101+
#ifndef USE_SCALAR_OFFSET
93102
Pass &= test<int8_t, 2>(Q);
94103
Pass &= test<int8_t, 4>(Q);
95104
Pass &= test<int8_t, 8>(Q);
96105
Pass &= test<int8_t, 16>(Q);
97106
Pass &= test<int8_t, 32>(Q);
107+
#endif
98108

99109
Pass &= test<int16_t, 1>(Q);
110+
#ifndef USE_SCALAR_OFFSET
100111
Pass &= test<int16_t, 2>(Q);
101112
Pass &= test<int16_t, 4>(Q);
102113
Pass &= test<int16_t, 8>(Q);
103114
Pass &= test<int16_t, 16>(Q);
104115
Pass &= test<int16_t, 32>(Q);
116+
#endif
105117

106118
Pass &= test<int32_t, 1>(Q);
119+
#ifndef USE_SCALAR_OFFSET
107120
Pass &= test<int32_t, 2>(Q);
108121
Pass &= test<int32_t, 4>(Q);
109122
Pass &= test<int32_t, 8>(Q);
110123
Pass &= test<int32_t, 16>(Q);
111124
Pass &= test<int32_t, 32>(Q);
112-
125+
#endif
113126
if (Dev.has(aspect::fp16)) {
114127
Pass &= test<half, 1>(Q);
128+
#ifndef USE_SCALAR_OFFSET
115129
Pass &= test<half, 2>(Q);
116130
Pass &= test<half, 4>(Q);
117131
Pass &= test<half, 8>(Q);
118132
Pass &= test<half, 16>(Q);
119133
Pass &= test<half, 32>(Q);
134+
#endif
120135
}
121136

122137
Pass &= test<bfloat16, 1>(Q);
138+
#ifndef USE_SCALAR_OFFSET
123139
Pass &= test<bfloat16, 2>(Q);
124140
Pass &= test<bfloat16, 4>(Q);
125141
Pass &= test<bfloat16, 8>(Q);
126142
Pass &= test<bfloat16, 16>(Q);
127143
Pass &= test<bfloat16, 32>(Q);
144+
#endif
128145
#ifdef USE_TF32
129146
Pass &= test<tfloat32, 1>(Q);
147+
#ifndef USE_SCALAR_OFFSET
130148
Pass &= test<tfloat32, 2>(Q);
131149
Pass &= test<tfloat32, 4>(Q);
132150
Pass &= test<tfloat32, 8>(Q);
133151
Pass &= test<tfloat32, 16>(Q);
134152
Pass &= test<tfloat32, 32>(Q);
153+
#endif
135154
#endif
136155

137156
std::cout << (Pass ? "Test Passed\n" : "Test FAILED\n");
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
//==------ svm_gather_scatter_scalar_off.cpp - DPC++ ESIMD on-device test -==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// REQUIRES: gpu && !gpu-intel-pvc
9+
// UNSUPPORTED: cuda || hip
10+
// RUN: %clangxx -fsycl %s -o %t.out
11+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
12+
13+
// Regression test for gather/scatter API.
14+
// scalar offset variant of the test - uses scalar offset.
15+
16+
#define USE_SCALAR_OFFSET
17+
18+
#include "svm_gather_scatter.cpp"
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
//==-- dword_atomic_cmpxchg_scalar_off.cpp - DPC++ ESIMD on-device test -==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// This test checks LSC atomic operations.
9+
//===----------------------------------------------------------------------===//
10+
// REQUIRES: gpu
11+
// UNSUPPORTED: cuda || hip
12+
// TODO: esimd_emulator fails due to random timeouts (_XFAIL_: esimd_emulator)
13+
// UNSUPPORTED: esimd_emulator
14+
// RUN: %clangxx -fsycl %s -o %t.out
15+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
16+
17+
// This macro enables only cmpxch tests. They may require more time to execute,
18+
// and have higher probablity to hit kernel execution time limit, so they are
19+
// separated.
20+
#define CMPXCHG_TEST
21+
22+
// This macro enforces usage of dword atomics in the included test.
23+
#define USE_DWORD_ATOMICS
24+
// This macro enforces scalar offset variant of the test - uses scalar offsets.
25+
#define USE_SCALAR_OFFSET
26+
27+
#include "lsc/atomic_smoke.cpp"
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
//==--- dword_atomic_smoke_scalar_off.cpp - DPC++ ESIMD on-device test ---==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// This test checks LSC atomic operations.
9+
//===----------------------------------------------------------------------===//
10+
// REQUIRES: gpu
11+
// TODO: esimd_emulator fails due to random timeouts (_XFAIL_: esimd_emulator)
12+
// UNSUPPORTED: esimd_emulator
13+
// RUN: %clangxx -fsycl %s -o %t.out
14+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
15+
//
16+
// scalar offset variant of the test - uses scalar offsets.
17+
18+
#define USE_DWORD_ATOMICS
19+
#define USE_SCALAR_OFFSET
20+
21+
#include "lsc/atomic_smoke.cpp"

SYCL/ESIMD/lsc/Inputs/lsc_usm_load.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,11 @@ bool test(uint32_t pmask = 0xffffffff) {
9595
lsc_block_store<T, VS, lsc_data_size::default_size>(
9696
out + elem_off, vals);
9797
} else {
98+
#ifndef USE_SCALAR_OFFSET
9899
simd<Toffset, VL> offset(byte_off, VS * sizeof(T));
100+
#else
101+
Toffset offset = byte_off;
102+
#endif
99103
simd_mask<VL> pred;
100104
for (int i = 0; i < VL; i++)
101105
pred.template select<1, 1>(i) = (pmask >> i) & 1;
@@ -142,8 +146,17 @@ bool test(uint32_t pmask = 0xffffffff) {
142146
for (int i = 0; i < Size; i++) {
143147
Tuint in_val = sycl::bit_cast<Tuint>(in[i]);
144148
Tuint out_val = sycl::bit_cast<Tuint>(out[i]);
149+
#ifndef USE_SCALAR_OFFSET
145150
Tuint e = (pmask >> ((i / VS) % VL)) & 1 ? in_val & vmask
146151
: sycl::bit_cast<Tuint>(old_val);
152+
#else
153+
// Calculate the mask to identify the areas that were actually updated
154+
constexpr uint16_t mask =
155+
1U << ((sycl::bit_cast<uint32_t>((float)VL) >> 23) - 126);
156+
Tuint e = ((i / VS) % VL == 0) && (pmask >> ((i / VS) % VL)) & (mask - 1)
157+
? in_val & vmask
158+
: sycl::bit_cast<Tuint>(old_val);
159+
#endif
147160
if (out_val != e) {
148161
passed = false;
149162
std::cout << "out[" << i << "] = 0x" << std::hex << out_val
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//==---- atomic_cmpxchg_scalar_off.cpp - DPC++ ESIMD on-device test ----==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// This test checks LSC compare-and-exchange atomic operations.
9+
//===----------------------------------------------------------------------===//
10+
// REQUIRES: gpu-intel-pvc
11+
// TODO: esimd_emulator fails due to random timeouts (_XFAIL_: esimd_emulator)
12+
// UNSUPPORTED: esimd_emulator
13+
// RUN: %clangxx -fsycl %s -o %t.out
14+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
15+
16+
// This macro enables only cmpxch tests. They may require more time to execute,
17+
// and have higher probablity to hit kernel execution time limit, so they are
18+
// separated.
19+
#define CMPXCHG_TEST
20+
// This macro enforces scalar offset variant of the test - uses scalar offsets.
21+
#define USE_SCALAR_OFFSET
22+
23+
#include "atomic_smoke.cpp"

SYCL/ESIMD/lsc/atomic_smoke.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
//===----------------------------------------------------------------------===//
1010
// REQUIRES: gpu-intel-pvc
1111
// TODO: esimd_emulator fails due to random timeouts (_XFAIL_: esimd_emulator)
12+
// TODO: esimd_emulator doesn't support xchg operation
1213
// UNSUPPORTED: esimd_emulator
1314
// RUN: %clangxx -fsycl %s -o %t.out
1415
// RUN: %GPU_RUN_PLACEHOLDER %t.out
@@ -216,8 +217,12 @@ bool test(queue q, const Config &cfg) {
216217
cgh.parallel_for<TestID<T, N, ImplF>>(
217218
rng, [=](id<1> ii) SYCL_ESIMD_KERNEL {
218219
int i = ii;
220+
#ifndef USE_SCALAR_OFFSET
219221
simd<Toffset, N> offsets(cfg.start_ind * sizeof(T),
220222
cfg.stride * sizeof(T));
223+
#else
224+
Toffset offsets = 0;
225+
#endif
221226
simd_mask<N> m = 1;
222227
m[cfg.masked_lane] = 0;
223228
// barrier to achieve better contention:
@@ -318,8 +323,14 @@ template <class T, int N> struct ImplInc {
318323
static T init(int i, const Config &cfg) { return (T)0; }
319324

320325
static T gold(int i, const Config &cfg) {
326+
#ifndef USE_SCALAR_OFFSET
321327
T gold = is_updated(i, N, cfg)
322328
? (T)(cfg.repeat * cfg.threads_per_group * cfg.n_groups)
329+
#else
330+
T gold =
331+
i == 0
332+
? (T)(cfg.repeat * cfg.threads_per_group * cfg.n_groups * (N - 1))
333+
#endif
323334
: init(i, cfg);
324335
return gold;
325336
}
@@ -331,11 +342,20 @@ template <class T, int N> struct ImplDec {
331342
static constexpr int base = 5;
332343

333344
static T init(int i, const Config &cfg) {
345+
#ifndef USE_SCALAR_OFFSET
334346
return (T)(cfg.repeat * cfg.threads_per_group * cfg.n_groups + base);
347+
#else
348+
return (T)(cfg.repeat * cfg.threads_per_group * cfg.n_groups * (N - 1) +
349+
base);
350+
#endif
335351
}
336352

337353
static T gold(int i, const Config &cfg) {
354+
#ifndef USE_SCALAR_OFFSET
338355
T gold = is_updated(i, N, cfg) ? (T)base : init(i, cfg);
356+
#else
357+
T gold = i == 0 ? (T)base : init(i, cfg);
358+
#endif
339359
return gold;
340360
}
341361
};
@@ -364,7 +384,11 @@ template <class T, int N> struct ImplStore {
364384
static T init(int i, const Config &cfg) { return 0; }
365385

366386
static T gold(int i, const Config &cfg) {
387+
#ifndef USE_SCALAR_OFFSET
367388
T gold = is_updated(i, N, cfg) ? base : init(i, cfg);
389+
#else
390+
T gold = i == 0 ? base : init(i, cfg);
391+
#endif
368392
return gold;
369393
}
370394

@@ -378,9 +402,15 @@ template <class T, int N, class C, C Op> struct ImplAdd {
378402
static T init(int i, const Config &cfg) { return 0; }
379403

380404
static T gold(int i, const Config &cfg) {
405+
#ifndef USE_SCALAR_OFFSET
381406
T gold = is_updated(i, N, cfg) ? (T)(cfg.repeat * cfg.threads_per_group *
382407
cfg.n_groups * (T)(1 + FPDELTA))
383408
: init(i, cfg);
409+
#else
410+
T gold = i == 0 ? (T)(cfg.repeat * cfg.threads_per_group * cfg.n_groups *
411+
(N - 1) * (T)(1 + FPDELTA))
412+
: init(i, cfg);
413+
#endif
384414
return gold;
385415
}
386416

@@ -393,13 +423,23 @@ template <class T, int N, class C, C Op> struct ImplSub {
393423
static constexpr T base = (T)(5 + FPDELTA);
394424

395425
static T init(int i, const Config &cfg) {
426+
#ifndef USE_SCALAR_OFFSET
396427
return (T)(cfg.repeat * cfg.threads_per_group * cfg.n_groups *
397428
(T)(1 + FPDELTA) +
398429
base);
430+
#else
431+
return (T)(cfg.repeat * cfg.threads_per_group * cfg.n_groups * (N - 1) *
432+
(T)(1 + FPDELTA) +
433+
base);
434+
#endif
399435
}
400436

401437
static T gold(int i, const Config &cfg) {
438+
#ifndef USE_SCALAR_OFFSET
402439
T gold = is_updated(i, N, cfg) ? base : init(i, cfg);
440+
#else
441+
T gold = i == 0 ? base : init(i, cfg);
442+
#endif
403443
return gold;
404444
}
405445

@@ -416,7 +456,11 @@ template <class T, int N, class C, C Op> struct ImplMin {
416456
}
417457

418458
static T gold(int i, const Config &cfg) {
459+
#ifndef USE_SCALAR_OFFSET
419460
T gold = is_updated(i, N, cfg) ? (T)MIN : init(i, cfg);
461+
#else
462+
T gold = i == 0 ? (T)MIN : init(i, cfg);
463+
#endif
420464
return gold;
421465
}
422466

@@ -431,7 +475,11 @@ template <class T, int N, class C, C Op> struct ImplMax {
431475
static T init(int i, const Config &cfg) { return (T)FPDELTA; }
432476

433477
static T gold(int i, const Config &cfg) {
478+
#ifndef USE_SCALAR_OFFSET
434479
T gold = is_updated(i, N, cfg)
480+
#else
481+
T gold = i == 0
482+
#endif
435483
? (T)(cfg.threads_per_group * cfg.n_groups - 1 + FPDELTA)
436484
: init(i, cfg);
437485
return gold;
@@ -482,7 +530,11 @@ template <class T, int N, class C, C Op> struct ImplCmpxchgBase {
482530
static T init(int i, const Config &cfg) { return base - 1; }
483531

484532
static T gold(int i, const Config &cfg) {
533+
#ifndef USE_SCALAR_OFFSET
485534
T gold = is_updated(i, N, cfg)
535+
#else
536+
T gold = i == 0
537+
#endif
486538
? (T)(cfg.threads_per_group * cfg.n_groups - 1 + base)
487539
: init(i, cfg);
488540
return gold;
@@ -606,12 +658,14 @@ int main(void) {
606658

607659
// Check load/store operations
608660
passed &= test_int_types<8, ImplLoad>(q, cfg);
661+
#ifndef USE_SCALAR_OFFSET
609662
if (q.get_backend() != sycl::backend::ext_intel_esimd_emulator)
610663
passed &= test_int_types<8, ImplStore>(q, cfg);
611664
#ifndef USE_DWORD_ATOMICS
612665
if (q.get_backend() != sycl::backend::ext_intel_esimd_emulator)
613666
passed &= test<float, 8, ImplStore>(q, cfg);
614667
#endif // USE_DWORD_ATOMICS
668+
#endif
615669
// TODO: check double other vector lengths in LSC mode.
616670

617671
std::cout << (passed ? "Passed\n" : "FAILED\n");
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
//==---------------- atomic_smoke_scalar_off.cpp - DPC++ ESIMD on-device test
2+
//-----==//
3+
//
4+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
// This test checks LSC atomic operations.
10+
//===----------------------------------------------------------------------===//
11+
// REQUIRES: gpu-intel-pvc
12+
// TODO: esimd_emulator fails due to random timeouts (_XFAIL_: esimd_emulator)
13+
// UNSUPPORTED: esimd_emulator
14+
// RUN: %clangxx -fsycl %s -o %t.out
15+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
16+
//
17+
// scalar offset variant of the test - uses scalar offsets.
18+
19+
#define USE_SCALAR_OFFSET
20+
21+
#include "atomic_smoke.cpp"

0 commit comments

Comments
 (0)