Skip to content

Commit b95b2d8

Browse files
authored
[ESIMD] Prepare to enforcing stateless accesses by default (#9959)
- Enable media API that works with image-accessors. Those accessors do not depend on stateful/stateless mode. - Extend some of LIT tests - enforce stateful and stateless modes in them explicitly to avoid changes caused by new default value of the switch -fsycl-esimd-force-stateless-mem in future. Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent 92855be commit b95b2d8

File tree

7 files changed

+70
-54
lines changed

7 files changed

+70
-54
lines changed

sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,10 +1164,6 @@ __ESIMD_INTRIN void __esimd_media_st(TACC handle, unsigned x, unsigned y,
11641164
}
11651165
#endif // __SYCL_DEVICE_ONLY__
11661166

1167-
// getter methods returning surface index are not available when stateless
1168-
// memory accesses are enforced.
1169-
#ifndef __ESIMD_FORCE_STATELESS_MEM
1170-
11711167
// \brief Converts given value to a surface index.
11721168
// The input must always be a result of
11731169
// detail::AccessorPrivateProxy::getQualifiedPtrOrImageObj(acc)
@@ -1201,6 +1197,4 @@ ESIMD_INLINE __ESIMD_NS::SurfaceIndex __esimd_get_surface_index(MemObjTy obj)
12011197
}
12021198
#endif // __SYCL_DEVICE_ONLY__
12031199

1204-
#endif // !__ESIMD_FORCE_STATELESS_MEM
1205-
12061200
/// @endcond ESIMD_DETAIL

sycl/include/sycl/ext/intel/esimd/memory.hpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,13 @@ __ESIMD_API SurfaceIndex get_surface_index(AccessorTy acc) {
6464
sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>) {
6565
return detail::SLM_BTI;
6666
} else {
67+
#ifdef __ESIMD_FORCE_STATELESS_MEM
68+
static_assert(sycl::detail::acc_properties::is_image_accessor_v<AccessorTy>,
69+
"The function get_surface_index() is available only for "
70+
"image- and local-accessors in stateless-only memory mode. "
71+
"Consider using "
72+
"-fno-sycl-esimd-force-stateless-mem compilation switch.");
73+
#endif // __ESIMD_FORCE_STATELESS_MEM
6774
return __esimd_get_surface_index(
6875
detail::AccessorPrivateProxy::getQualifiedPtrOrImageObj(acc));
6976
}
@@ -1910,7 +1917,6 @@ __ESIMD_API simd<Tx, N> slm_atomic_update(simd<uint32_t, N> offsets,
19101917

19111918
/// @} sycl_esimd_memory_slm
19121919

1913-
#ifndef __ESIMD_FORCE_STATELESS_MEM
19141920
/// @addtogroup sycl_esimd_memory
19151921
/// @{
19161922

@@ -1996,7 +2002,6 @@ __ESIMD_API void media_block_store(AccessorTy acc, unsigned x, unsigned y,
19962002
vals.data());
19972003
}
19982004
}
1999-
#endif // !__ESIMD_FORCE_STATELESS_MEM
20002005

20012006
/// @} sycl_esimd_memory
20022007

sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,6 @@ __ESIMD_API void split_barrier(split_barrier_action flag) {
3636

3737
/// @} sycl_esimd_memory
3838

39-
// sycl_esimd_raw_send intrinsics are not available when stateless memory
40-
// accesses are enforced.
41-
#ifndef __ESIMD_FORCE_STATELESS_MEM
42-
4339
/// @addtogroup sycl_esimd_raw_send
4440
/// @{
4541

@@ -207,8 +203,6 @@ raw_send(__ESIMD_NS::simd<T1, n1> msgSrc0, uint32_t exDesc, uint32_t msgDesc,
207203

208204
/// @} sycl_esimd_raw_send
209205

210-
#endif // !__ESIMD_FORCE_STATELESS_MEM
211-
212206
/// @defgroup sycl_esimd_memory_nbarrier Named barrier APIs.
213207
/// @ingroup sycl_esimd_memory
214208

@@ -734,7 +728,8 @@ lsc_gather(AccessorTy acc,
734728
#endif
735729
__ESIMD_NS::simd_mask<N> pred = 1) {
736730
#ifdef __ESIMD_FORCE_STATELESS_MEM
737-
return lsc_gather<T, NElts, DS, L1H, L3H>(acc.get_pointer(), offsets, pred);
731+
return lsc_gather<T, NElts, DS, L1H, L3H>(
732+
reinterpret_cast<T *>(acc.get_pointer()), offsets, pred);
738733
#else
739734
detail::check_lsc_vector_size<NElts>();
740735
detail::check_lsc_data_size<T, DS>();
@@ -807,8 +802,9 @@ lsc_gather(AccessorTy acc,
807802
__ESIMD_NS::simd_mask<N> pred,
808803
__ESIMD_NS::simd<T, N * NElts> old_values) {
809804
#ifdef __ESIMD_FORCE_STATELESS_MEM
810-
return lsc_gather<T, NElts, DS, L1H, L3H>(acc.get_pointer(), offsets, pred,
811-
old_values);
805+
return lsc_gather<T, NElts, DS, L1H, L3H>(
806+
reinterpret_cast<T *>(acc.get_pointer()), offsets, pred, old_values);
807+
812808
#else
813809
detail::check_lsc_vector_size<NElts>();
814810
detail::check_lsc_data_size<T, DS>();

sycl/test-e2e/ESIMD/vadd_raw_send.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
// UNSUPPORTED: gpu-intel-dg1,gpu-intel-dg2,gpu-intel-pvc
1010
// TODO: esimd_emulator fails due to unimplemented 'raw_send' intrinsic
1111
// XFAIL: esimd_emulator
12-
// RUN: %{build} -o %t1.out
12+
// RUN: %{build} -fno-sycl-esimd-force-stateless-mem -o %t1.out
1313
// RUN: %{run} %t1.out
14-
// RUN: %{build} -DNEW_API -o %t2.out
14+
// RUN: %{build} -DNEW_API -fno-sycl-esimd-force-stateless-mem -o %t2.out
1515
// RUN: %{run} %t2.out
1616

1717
// The test checks raw send functionality with block read/write implementation

sycl/test/esimd/intrins_trans.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clangxx -O0 -fsycl -fsycl-device-only -Xclang -emit-llvm -Xclang -no-opaque-pointers %s -o %t
1+
// RUN: %clangxx -O0 -fsycl -fno-sycl-esimd-force-stateless-mem -fsycl-device-only -Xclang -emit-llvm -Xclang -no-opaque-pointers %s -o %t
22
// RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %t -o %t.table
33
// RUN: FileCheck %s -input-file=%t_esimd_0.ll
44

sycl/test/esimd/intrins_trans_opaque.cpp

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
1-
// RUN: %clangxx -O0 -fsycl -fsycl-device-only -Xclang -emit-llvm -Xclang -opaque-pointers %s -o %t
1+
// RUN: %clangxx -O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -Xclang -emit-llvm -Xclang -opaque-pointers %s -o %t
22
// RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %t -o %t.table
3-
// RUN: FileCheck %s -input-file=%t_esimd_0.ll
3+
// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL
4+
5+
// RUN: %clangxx -O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -Xclang -emit-llvm -Xclang -opaque-pointers %s -o %t
6+
// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table
7+
// RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS
48

59
// Checks ESIMD intrinsic translation with opaque pointers.
610
// NOTE: must be run in -O0, as optimizer optimizes away some of the code
@@ -266,31 +270,35 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd<float, 16> foo() {
266270

267271
// 4-byte element gather
268272
simd<int, 8> v = gather<int, 8>(acc, offsets, 100);
269-
// CHECK: %[[SI3_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}})
270-
// CHECK: store i32 %[[SI3_VAL]], ptr addrspace(4) %[[SI3_ADDR:[0-9a-zA-Z_.]+]]
271-
// CHECK: %[[SI3:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI3_ADDR]]
272-
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 2, i16 0, i32 %[[SI3]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}})
273+
// CHECK-STATEFUL: %[[SI3_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}})
274+
// CHECK-STATEFUL: store i32 %[[SI3_VAL]], ptr addrspace(4) %[[SI3_ADDR:[0-9a-zA-Z_.]+]]
275+
// CHECK-STATEFUL: %[[SI3:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI3_ADDR]]
276+
// CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 2, i16 0, i32 %[[SI3]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}})
277+
// CHECK-STATELESS: call <8 x i32> @llvm.genx.svm.gather.v8i32.v8i1.v8i64(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <8 x i32> undef)
273278

274279
// 4-byte element scatter
275280
scatter<int, 8>(acc, offsets, v, 100, pred);
276-
// CHECK: %[[SI4_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}})
277-
// CHECK: store i32 %[[SI4_VAL]], ptr addrspace(4) %[[SI4_ADDR:[0-9a-zA-Z_.]+]]
278-
// CHECK: %[[SI4:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI4_ADDR]]
279-
// CHECK: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 2, i16 0, i32 %[[SI4]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}})
281+
// CHECK-STATEFUL: %[[SI4_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}})
282+
// CHECK-STATEFUL: store i32 %[[SI4_VAL]], ptr addrspace(4) %[[SI4_ADDR:[0-9a-zA-Z_.]+]]
283+
// CHECK-STATEFUL: %[[SI4:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI4_ADDR]]
284+
// CHECK-STATEFUL: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 2, i16 0, i32 %[[SI4]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}})
285+
// CHECK-STATELESS: call void @llvm.genx.svm.scatter.v8i1.v8i64.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}})
280286

281287
// 1-byte element gather
282288
simd<unsigned char, 8> v1 = gather<unsigned char, 8>(acc, offsets, 100);
283-
// CHECK: %[[SI5_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}})
284-
// CHECK: store i32 %[[SI5_VAL]], ptr addrspace(4) %[[SI5_ADDR:[0-9a-zA-Z_.]+]]
285-
// CHECK: %[[SI5:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI5_ADDR]]
286-
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 0, i16 0, i32 %[[SI5]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}})
289+
// CHECK-STATEFUL: %[[SI5_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}})
290+
// CHECK-STATEFUL: store i32 %[[SI5_VAL]], ptr addrspace(4) %[[SI5_ADDR:[0-9a-zA-Z_.]+]]
291+
// CHECK-STATEFUL: %[[SI5:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI5_ADDR]]
292+
// CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 0, i16 0, i32 %[[SI5]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}})
293+
// CHECK-STATELESS: call <32 x i8> @llvm.genx.svm.gather.v32i8.v8i1.v8i64(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i8> undef)
287294

288295
// 1-byte element scatter
289296
scatter<unsigned char, 8>(acc, offsets, v1, 100, pred);
290-
// CHECK: %[[SI6_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}})
291-
// CHECK: store i32 %[[SI6_VAL]], ptr addrspace(4) %[[SI6_ADDR:[0-9a-zA-Z_.]+]]
292-
// CHECK: %[[SI6:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI6_ADDR]]
293-
// CHECK: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, i16 0, i32 %[[SI6]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}})
297+
// CHECK-STATEFUL: %[[SI6_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}})
298+
// CHECK-STATEFUL: store i32 %[[SI6_VAL]], ptr addrspace(4) %[[SI6_ADDR:[0-9a-zA-Z_.]+]]
299+
// CHECK-STATEFUL: %[[SI6:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI6_ADDR]]
300+
// CHECK-STATEFUL: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, i16 0, i32 %[[SI6]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}})
301+
// CHECK-STATELESS: call void @llvm.genx.svm.scatter.v8i1.v8i64.v32i8(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i8> %{{[0-9a-zA-Z_.]+}})
294302
}
295303
__esimd_fence(fence_mask::global_coherent_fence);
296304
// CHECK: call void @llvm.genx.fence(i8 1)

0 commit comments

Comments
 (0)