Skip to content

Commit 0da723f

Browse files
authored
[ESIMD] Prepare for enabling of enforcing of stateless memory mode (#10931)
* Support 64-bit offset if scalar_load() and scalar_store() * Fix simd_view_copy_move_assign.cpp test * Fix the test intrins_trans.cpp This patch is only the preliminary step to final enabling of force-stateless-memory mode that should be finalized here: #9452 Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent cb34ea9 commit 0da723f

File tree

5 files changed

+50
-29
lines changed

5 files changed

+50
-29
lines changed

clang/lib/Frontend/InitPreprocessor.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1343,7 +1343,9 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
13431343
if (LangOpts.SYCLUnnamedLambda)
13441344
Builder.defineMacro("__SYCL_UNNAMED_LAMBDA__");
13451345

1346-
if (LangOpts.SYCLESIMDForceStatelessMem)
1346+
// Stateless memory may be enforced only for SYCL device or host.
1347+
if ((LangOpts.SYCLIsDevice || LangOpts.SYCLIsHost) &&
1348+
LangOpts.SYCLESIMDForceStatelessMem)
13471349
Builder.defineMacro("__ESIMD_FORCE_STATELESS_MEM");
13481350

13491351
// OpenCL definitions.

llvm/test/SYCLLowerIR/ESIMD/acc_ptr.ll

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
; RUN: opt -passes=LowerESIMD -S < %s | FileCheck %s
1+
; RUN: opt -passes=LowerESIMD -lower-esimd-force-stateless-mem=false -S < %s | FileCheck --check-prefix=CHECK-NOFORCE --check-prefix=CHECK %s
2+
; RUN: opt -passes=LowerESIMD -lower-esimd-force-stateless-mem=true -S < %s | FileCheck --check-prefix=CHECK-FORCE --check-prefix=CHECK %s
23

34
; This test checks that LowerESIMD pass correctly interpretes the
45
; 'kernel_arg_accessor_ptr' metadata. Particularly, that it generates additional
@@ -30,8 +31,14 @@ define weak_odr dso_local spir_kernel void @ESIMDKernel(i32 %_arg_, ptr addrspac
3031

3132
; CHECK: attributes #[[GENX_MAIN]] = { "CMGenxMain" "oclrt"="1" }
3233
; CHECK: !genx.kernels = !{![[GENX_KERNELS:[0-9]+]]}
33-
; CHECK: ![[GENX_KERNELS]] = !{ptr @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_IO_KINDS:[0-9]+]], ![[ARG_DESCS:[0-9]+]], i32 0, i32 0}
34-
; CHECK: ![[ARG_KINDS]] = !{i32 0, i32 2, i32 2, i32 0, i32 0}
35-
; CHECK: ![[ARG_IO_KINDS]] = !{i32 0, i32 0, i32 0, i32 0, i32 0}
36-
; CHECK: ![[ARG_DESCS]] = !{!"", !"buffer_t", !"buffer_t", !"", !"svmptr_t"}
34+
35+
; CHECK-NOFORCE: ![[GENX_KERNELS]] = !{ptr @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_IO_KINDS:[0-9]+]], ![[ARG_DESCS:[0-9]+]], i32 0, i32 0}
36+
; CHECK-FORCE: ![[GENX_KERNELS]] = !{ptr @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_KINDS]], ![[ARG_DESCS:[0-9]+]], i32 0, i32 0}
37+
38+
; CHECK-NOFORCE: ![[ARG_KINDS]] = !{i32 0, i32 2, i32 2, i32 0, i32 0}
39+
; CHECK-FORCE: ![[ARG_KINDS]] = !{i32 0, i32 0, i32 0, i32 0, i32 0}
40+
; CHECK-NOFORCE: ![[ARG_IO_KINDS]] = !{i32 0, i32 0, i32 0, i32 0, i32 0}
41+
42+
; CHECK-NOFORCE: ![[ARG_DESCS]] = !{!"", !"buffer_t", !"buffer_t", !"", !"svmptr_t"}
43+
; CHECK-FORCE: ![[ARG_DESCS]] = !{!"", !"svmptr_t", !"svmptr_t", !"", !"svmptr_t"}
3744

sycl/include/sycl/ext/intel/esimd/memory.hpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -620,9 +620,15 @@ scatter(AccessorTy acc, simd<Toffset, N> offsets, simd<T, N> vals,
620620
/// @return The loaded value.
621621
///
622622
template <typename T, typename AccessorTy>
623-
__ESIMD_API T scalar_load(AccessorTy acc, uint32_t offset) {
623+
__ESIMD_API T scalar_load(AccessorTy acc,
624+
#ifdef __ESIMD_FORCE_STATELESS_MEM
625+
uint64_t offset
626+
#else
627+
uint32_t offset
628+
#endif
629+
) {
624630
const simd<T, 1> Res =
625-
gather<T, 1, AccessorTy>(acc, simd<uint32_t, 1>(offset));
631+
gather<T, 1, AccessorTy>(acc, simd<decltype(offset), 1>(offset));
626632
return Res[0];
627633
}
628634

@@ -634,8 +640,15 @@ __ESIMD_API T scalar_load(AccessorTy acc, uint32_t offset) {
634640
/// @param val The stored value.
635641
///
636642
template <typename T, typename AccessorTy>
637-
__ESIMD_API void scalar_store(AccessorTy acc, uint32_t offset, T val) {
638-
scatter<T, 1, AccessorTy>(acc, simd<uint32_t, 1>(offset), simd<T, 1>(val));
643+
__ESIMD_API void scalar_store(AccessorTy acc,
644+
#ifdef __ESIMD_FORCE_STATELESS_MEM
645+
uint64_t offset,
646+
#else
647+
uint32_t offset,
648+
#endif
649+
T val) {
650+
scatter<T, 1, AccessorTy>(acc, simd<decltype(offset), 1>(offset),
651+
simd<T, 1>(val));
639652
}
640653

641654
/// @anchor usm_gather_rgba

sycl/test-e2e/ESIMD/api/simd_view_copy_move_assign.cpp

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,34 +24,34 @@ using namespace sycl::ext::intel::esimd;
2424
template <unsigned VL, class T, class F>
2525
bool test(queue q, std::string str, F funcUnderTest) {
2626
std::cout << "Testing " << str << ", VL = " << VL << " ...\n";
27-
T A[VL];
28-
T B[VL];
27+
size_t Size = 4 * VL;
28+
T A[Size];
29+
T B[Size];
2930
constexpr unsigned HalfVL = VL > 1 ? (VL / 2) : 1;
3031

3132
// The expected result gets the first half of values from B,
32-
int gold[VL];
33-
for (int i = 0; i < VL; ++i) {
33+
int gold[Size];
34+
for (int i = 0; i < Size; ++i) {
3435
A[i] = -i - 1;
3536
B[i] = i + 1;
36-
gold[i] = ((VL > 1) && (i < HalfVL)) ? B[i] : A[i];
37+
gold[i] = ((VL > 1) && ((i % VL) < HalfVL)) ? B[i] : A[i];
3738
}
3839

3940
try {
40-
buffer<T, 1> bufA(A, range<1>(VL));
41-
buffer<T, 1> bufB(B, range<1>(VL));
42-
range<1> glob_range{1};
41+
buffer<T, 1> BufA(A, range<1>(Size));
42+
buffer<T, 1> BufB(B, range<1>(Size));
4343

4444
q.submit([&](handler &cgh) {
45-
auto PA = bufA.template get_access<access::mode::read_write>(cgh);
46-
auto PB = bufB.template get_access<access::mode::read>(cgh);
47-
cgh.parallel_for(glob_range, [=](id<1> i) SYCL_ESIMD_KERNEL {
45+
auto PA = BufA.template get_access<access::mode::read_write>(cgh);
46+
auto PB = BufB.template get_access<access::mode::read_write>(cgh);
47+
cgh.parallel_for(range<1>{Size / VL}, [=](id<1> i) SYCL_ESIMD_KERNEL {
4848
using namespace sycl::ext::intel::esimd;
4949
unsigned int offset = i * VL * sizeof(T);
5050
simd<T, VL> va;
5151
simd<T, VL> vb;
5252
if constexpr (VL == 1) {
53-
va[0] = scalar_load<T>(PA, 0);
54-
vb[0] = scalar_load<T>(PB, 0);
53+
va[0] = scalar_load<T>(PA, offset);
54+
vb[0] = scalar_load<T>(PB, offset);
5555
} else {
5656
va.copy_from(PA, offset);
5757
vb.copy_from(PB, offset);
@@ -62,7 +62,7 @@ bool test(queue q, std::string str, F funcUnderTest) {
6262
funcUnderTest(va_view, vb_view);
6363

6464
if constexpr (VL == 1) {
65-
scalar_store(PB, 0, (T)va[0]);
65+
scalar_store(PB, offset, (T)va[0]);
6666
} else {
6767
va.copy_to(PA, offset);
6868
}
@@ -74,7 +74,7 @@ bool test(queue q, std::string str, F funcUnderTest) {
7474
}
7575

7676
int err_cnt = 0;
77-
for (unsigned i = 0; i < VL; ++i) {
77+
for (unsigned i = 0; i < Size; ++i) {
7878
if (A[i] != gold[i]) {
7979
err_cnt++;
8080
std::cout << "failed at index " << i << ": " << A[i] << " != " << gold[i]
@@ -157,8 +157,7 @@ template <class T> bool testT(queue &q) {
157157
int main(void) {
158158
queue q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler());
159159
auto dev = q.get_device();
160-
std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
161-
<< "\n";
160+
esimd_test::printTestLabel(q);
162161
bool passed = true;
163162
passed &= testT<char>(q);
164163
passed &= testT<float>(q);

sycl/test/esimd/intrins_trans.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
// RUN: %clangxx -O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t
2-
// RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %t -o %t.table
2+
// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table
33
// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL
44

55
// RUN: %clangxx -O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t
6-
// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table
6+
// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=true -O0 -S %t -o %t.table
77
// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS
88

99
// Checks ESIMD intrinsic translation with opaque pointers.

0 commit comments

Comments
 (0)