Skip to content

Commit c1c9184

Browse files
authored
[ESIMD] Fix the ambiguous call error for a corner case of gather() (#12506)
Some corner cases with interger scalar operands of gather() may cause ambiguous call error. Examples of such calls: // 0 may be treated as vector `simd_mask` or scalar 'global_offset' auto res1 = gather<T, N>(acc, offsets_simd, 0); // This case is more tricky, but also can confuse C++ FE. auto res2 = gather<T, N>(acc, offsets_simd, 0, mask); Signed-off-by: Klochkov, Vyacheslav N <[email protected]>
1 parent 8d0fa98 commit c1c9184

File tree

2 files changed

+26
-4
lines changed

2 files changed

+26
-4
lines changed

sycl/include/sycl/ext/intel/esimd/memory.hpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2934,15 +2934,21 @@ gather(AccessorT acc, simd<OffsetT, N / VS> byte_offsets,
29342934
/// to 1. This variant is added for convenience and let user omit the template
29352935
/// arguments and call the function as
29362936
/// 'gather(acc, byte_offsets, mask, pass_thru);'.
2937+
// Dev note: the mask type was turned into template parameter `MaskT` to
2938+
// avoid the conflicts of this prototype with the old gather() function
2939+
// accepting a 'global_offset' parameter and avoid 'ambiguous call' errors
2940+
// for calls like this: gather(acc, byte_offsets_simd, 0, mask);
29372941
template <typename T, int N, typename AccessorT, typename OffsetT,
2942+
typename MaskT,
29382943
typename PropertyListT =
29392944
ext::oneapi::experimental::detail::empty_properties_t>
29402945
__ESIMD_API std::enable_if_t<
29412946
(detail::is_device_accessor_with_v<AccessorT,
29422947
detail::accessor_mode_cap::can_read> &&
2948+
std::is_same_v<MaskT, simd_mask<N>> &&
29432949
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
29442950
simd<T, N>>
2945-
gather(AccessorT acc, simd<OffsetT, N> byte_offsets, simd_mask<N> mask,
2951+
gather(AccessorT acc, simd<OffsetT, N> byte_offsets, MaskT mask,
29462952
simd<T, N> pass_thru, PropertyListT props = {}) {
29472953
return gather<T, N, 1>(acc, byte_offsets, mask, pass_thru, props);
29482954
}
@@ -2954,15 +2960,19 @@ gather(AccessorT acc, simd<OffsetT, N> byte_offsets, simd_mask<N> mask,
29542960
/// This function is identical to (acc-ga-2) except that vector size is fixed
29552961
/// to 1. This variant is added for convenience and let user omit the template
29562962
/// arguments and call the function as 'gather(acc, byte_offsets, mask);'.
2963+
// Dev note: the mask type was turned into template parameter `MaskT` to
2964+
// avoid the conflicts of this prototype with the old gather() function
2965+
// accepting a 'global_offset' parameter and avoid 'ambiguous call' errors
2966+
// for calls like this: gather(acc, byte_offsets_simd, 0);
29572967
template <typename T, int N, typename AccessorT, typename OffsetT,
29582968
typename MaskT,
29592969
typename PropertyListT =
29602970
ext::oneapi::experimental::detail::empty_properties_t>
29612971
__ESIMD_API std::enable_if_t<
29622972
(detail::is_device_accessor_with_v<AccessorT,
29632973
detail::accessor_mode_cap::can_read> &&
2964-
ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
2965-
std::is_same_v<MaskT, simd_mask<N>>),
2974+
std::is_same_v<MaskT, simd_mask<N>> &&
2975+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
29662976
simd<T, N>>
29672977
gather(AccessorT acc, simd<OffsetT, N> byte_offsets, MaskT mask,
29682978
PropertyListT props = {}) {

sycl/test/esimd/intrins_trans.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,14 +304,26 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd<float, 16> foo() {
304304
// CHECK-STATEFUL: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 2, i16 0, i32 %[[SI4]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}})
305305
// CHECK-STATELESS: call void @llvm.genx.svm.scatter.v8i1.v8i64.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}})
306306

307-
// 1-byte element gather
307+
// 1-byte element gather: same code with and without mask
308308
simd<unsigned char, 8> v1 = gather<unsigned char, 8>(acc, offsets, 100);
309309
// CHECK-STATEFUL: %[[SI5_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}})
310310
// CHECK-STATEFUL: store i32 %[[SI5_VAL]], ptr addrspace(4) %[[SI5_ADDR:[0-9a-zA-Z_.]+]]
311311
// CHECK-STATEFUL: %[[SI5:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI5_ADDR]]
312312
// CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 0, i16 0, i32 %[[SI5]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}})
313313
// CHECK-STATELESS: call <32 x i8> @llvm.genx.svm.gather.v32i8.v8i1.v8i64(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i8> undef)
314314

315+
// 1-byte element gather using the mask
316+
v1 = gather<unsigned char, 8>(acc, offsets, 100, pred);
317+
// CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 0, i16 0, i32 {{[^)]+}}, i32 {{[^)]+}}, <8 x i32> {{[^)]+}}, <8 x i1> {{[^)]+}})
318+
// CHECK-STATELESS: call <32 x i8> @llvm.genx.svm.gather.v32i8.v8i1.v8i64(<8 x i1> {{[^)]+}}, i32 0, <8 x i64> {{[^)]+}}, <32 x i8> undef)
319+
320+
// 1-byte element gather using the mask - the mask is signed, which may
321+
// expose different issues/conflicts in gather API.
322+
simd<int32_t, 8> ioffsets = 1;
323+
v1 = gather<unsigned char, 8>(acc, ioffsets, 0, pred);
324+
// CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 0, i16 0, i32 {{[^)]+}}, i32 {{[^)]+}}, <8 x i32> {{[^)]+}}, <8 x i1> {{[^)]+}})
325+
// CHECK-STATELESS: call <32 x i8> @llvm.genx.svm.gather.v32i8.v8i1.v8i64(<8 x i1> {{[^)]+}}, i32 0, <8 x i64> {{[^)]+}}, <32 x i8> undef)
326+
315327
// 1-byte element scatter
316328
scatter<unsigned char, 8>(acc, offsets, v1, 100, pred);
317329
// CHECK-STATEFUL: %[[SI6_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}})

0 commit comments

Comments
 (0)