Skip to content

[ESIMD][NFC] Remove few unused intrinsics, follow-up NFC for #11545 #11594

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 0 additions & 25 deletions llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,12 +358,6 @@ class ESIMDIntrinDescTable {
{"svm_scatter4_scaled",
{"svm.scatter4.scaled", {ai1(2), t(2), c16(0), c64(0), a(0), a(1)}}},

// intrinsics to query thread's coordinates:
{"group_id_x", {"group.id.x", {}}},
{"group_id_y", {"group.id.y", {}}},
{"group_id_z", {"group.id.z", {}}},
{"local_id", {"local.id", {}}},
{"local_size", {"local.size", {}}},
{"svm_atomic0", {"svm.atomic", {ai1(1), a(0), u(-1)}, bo(0)}},
{"svm_atomic1", {"svm.atomic", {ai1(2), a(0), a(1), u(-1)}, bo(0)}},
{"svm_atomic2",
Expand Down Expand Up @@ -405,25 +399,6 @@ class ESIMDIntrinDescTable {
// arg2: data to write (overloaded)
{"oword_st", {"oword.st", {aSI(0), a(1), a(2)}}},

// surface index-based gather/scatter:
// arg0: i32 log2 num blocks, CONSTANT (0/1/2 for num blocks 1/2/4)
// arg1: i16 scale, CONSTANT
// arg2: i32 surface index
// arg3: i32 global offset in bytes
// arg4: vXi32 element offset in bytes (overloaded)
{"gather_scaled2",
{"gather.scaled2", {t(3), t(4), aSI(0), a(1), a(2)}}},

// arg0: vXi1 predicate (overloaded)
// arg1: i32 log2 num blocks, CONSTANT (0/1/2 for num blocks 1/2/4)
// arg2: i16 scale, CONSTANT
// arg3: i32 surface index
// arg4: i32 global offset in bytes
// arg5: vXi32 element offset in bytes (overloaded)
// arg6: old value of the data read
{"gather_scaled",
{"gather.scaled", {ai1(0), t(3), t(4), aSI(1), a(2), a(3), u(-1)}}},

// arg0: i32 log2 num blocks, CONSTANT (0/1/2 for num blocks 1/2/4)
// arg1: i16 scale, CONSTANT
// arg2: i32 surface index
Expand Down
56 changes: 1 addition & 55 deletions sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ __esimd_svm_block_st(__ESIMD_DNS::vector_type_t<Ty, N> *addr,
/// @tparam VS is the number of elements to load per address.
/// @tparam Transposed indicates if the data is transposed during the transfer.
/// @tparam N is the SIMD size of operation (the number of addresses to access)
/// @tparam SurfIndAliasTy is the \ref sycl::accessor type.
/// @tparam SurfIndAliasT is the \ref sycl::accessor type.
/// @param pred is predicates.
/// @param offsets is the zero-based offsets in bytes.
/// @param surf_ind is the surface index.
Expand Down Expand Up @@ -268,43 +268,6 @@ __ESIMD_INTRIN void __esimd_svm_scatter4_scaled(
}
#endif // __SYCL_DEVICE_ONLY__

// Low-level surface-based gather. Collects elements located at given offsets in
// a surface and returns them as a single \ref simd object. Element can be
// 1, 2 or 4-byte value, but is always returned as a 4-byte value within the
// resulting simd object, with upper 2 or 3 bytes undefined.
// Template (compile-time constant) parameters:
// @tparam Ty - element type; can only be a 4-byte integer or \c float,
// @tparam N - the number of elements
// @tparam SurfIndAliasTy - "surface index alias" type - internal type in the
// accessor used to denote the surface
// @tparam TySizeLog2 - Log2 of the number of bytes read per element:
// 0 - 1 byte, 1 - 2 bytes, 2 - 4 bytes
// @tparam Scale - offset scaling factor; must be zero currently
// @tparam L1H - L1 cache hint
// @tparam L2H - L2 cache hint
//
// Formal parameters:
// @param surf_ind - the surface index, taken from the SYCL memory object
// @param global_offset - offset added to each individual element's offset to
// compute actual memory access offset for that element
// @param elem_offsets - per-element offsets
//
template <typename Ty, int N, typename SurfIndAliasTy, int TySizeLog2,
int16_t Scale = 0>
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
__esimd_gather_scaled2(SurfIndAliasTy surf_ind, uint32_t global_offset,
__ESIMD_DNS::vector_type_t<uint32_t, N> elem_offsets)
#ifdef __SYCL_DEVICE_ONLY__
;
#else
{
static_assert(N == 1 || N == 8 || N == 16 || N == 32);
static_assert(TySizeLog2 <= 2 && Scale == 0);
static_assert(std::is_integral_v<Ty> || TySizeLog2 == 2);
__ESIMD_UNSUPPORTED_ON_HOST;
}
#endif // __SYCL_DEVICE_ONLY__

// Low-level surface-based scatter. Writes elements of a \ref simd object into a
// surface at given offsets. Element can be a 1, 2 or 4-byte value, but it is
// always represented as a 4-byte value within the input simd object,
Expand All @@ -317,8 +280,6 @@ __esimd_gather_scaled2(SurfIndAliasTy surf_ind, uint32_t global_offset,
// @tparam TySizeLog2 - Log2 of the number of bytes written per element:
// 0 - 1 byte, 1 - 2 bytes, 2 - 4 bytes
// @tparam Scale - offset scale; only 0 is supported for now
// @tparam L1H - L1 cache hint
// @tparam L2H - L2 cache hint
//
// Formal parameters:
// @param pred - per-element predicates; elements with zero corresponding
Expand Down Expand Up @@ -416,21 +377,6 @@ __ESIMD_INTRIN void __esimd_fence(uint8_t cntl)
}
#endif // __SYCL_DEVICE_ONLY__

// Scaled gather from a surface.
template <typename Ty, int N, typename SurfIndAliasTy, int TySizeLog2,
int16_t Scale = 0>
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
__esimd_gather_scaled(__ESIMD_DNS::simd_mask_storage_t<N> pred,
SurfIndAliasTy surf_ind, uint32_t global_offset,
__ESIMD_DNS::vector_type_t<uint32_t, N> addrs)
#ifdef __SYCL_DEVICE_ONLY__
;
#else
{
__ESIMD_UNSUPPORTED_ON_HOST;
}
#endif // __SYCL_DEVICE_ONLY__

// Predicated (masked) scaled gather from a surface.
//
// Template (compile-time constant) parameters:
Expand Down
47 changes: 9 additions & 38 deletions sycl/include/sycl/ext/intel/esimd/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ scatter(Tx *p, Toffset offset, simd<Tx, N> vals, simd_mask<N> mask = 1) {

namespace detail {
// Accessors may get either 32-bit offset or 64-bit depending on
// the -fsycl-esimd-force-stateles-mem mode settigs.
// the -fsycl-esimd-force-stateles-mem mode setting.
#ifdef __ESIMD_FORCE_STATELESS_MEM
using DeviceAccessorOffsetT = uint64_t;
#else
Expand Down Expand Up @@ -394,7 +394,6 @@ block_load_impl(const T *p, simd_mask<1> pred, simd<T, NElts> pass_thru,
///
/// @tparam T is element type.
/// @tparam NElts is the number of elements to load per address.
/// @tparam DS is the data size.
/// @tparam L1H is L1 cache hint.
/// @tparam L2H is L2 cache hint.
/// @tparam AccessorT is the \ref sycl::accessor type.
Expand Down Expand Up @@ -1455,13 +1454,8 @@ __ESIMD_API std::enable_if_t<
sycl::detail::acc_properties::is_accessor_v<AccessorTy> &&
!sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>,
simd<T, N>>
gather(AccessorTy acc,
#ifdef __ESIMD_FORCE_STATELESS_MEM
simd<uint64_t, N> offsets, uint64_t glob_offset = 0,
#else
simd<uint32_t, N> offsets, uint32_t glob_offset = 0,
#endif
simd_mask<N> mask = 1) {
gather(AccessorTy acc, simd<detail::DeviceAccessorOffsetT, N> offsets,
detail::DeviceAccessorOffsetT glob_offset = 0, simd_mask<N> mask = 1) {
#ifdef __ESIMD_FORCE_STATELESS_MEM
return gather<T, N>(__ESIMD_DNS::accessorToPointer<T>(acc, glob_offset),
offsets, mask);
Expand Down Expand Up @@ -1508,18 +1502,8 @@ __ESIMD_API std::enable_if_t<
(sizeof(T) <= 4) && (N == 1 || N == 8 || N == 16 || N == 32) &&
sycl::detail::acc_properties::is_accessor_v<AccessorTy> &&
!sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>>
scatter(AccessorTy acc,
#ifdef __ESIMD_FORCE_STATELESS_MEM
simd<uint64_t, N> offsets,
#else
simd<uint32_t, N> offsets,
#endif
simd<T, N> vals,
#ifdef __ESIMD_FORCE_STATELESS_MEM
uint64_t glob_offset = 0,
#else
uint32_t glob_offset = 0,
#endif
scatter(AccessorTy acc, simd<detail::DeviceAccessorOffsetT, N> offsets,
simd<T, N> vals, detail::DeviceAccessorOffsetT glob_offset = 0,
simd_mask<N> mask = 1) {
#ifdef __ESIMD_FORCE_STATELESS_MEM
scatter<T, N>(__ESIMD_DNS::accessorToPointer<T>(acc, glob_offset), offsets,
Expand Down Expand Up @@ -1801,12 +1785,8 @@ __ESIMD_API std::enable_if_t<
!std::is_pointer_v<AccessorT> &&
!sycl::detail::acc_properties::is_local_accessor_v<AccessorT>),
simd<T, N * get_num_channels_enabled(RGBAMask)>>
gather_rgba(AccessorT acc,
#ifdef __ESIMD_FORCE_STATELESS_MEM
simd<uint64_t, N> offsets, uint64_t global_offset = 0,
#else
simd<uint32_t, N> offsets, uint32_t global_offset = 0,
#endif
gather_rgba(AccessorT acc, simd<detail::DeviceAccessorOffsetT, N> offsets,
detail::DeviceAccessorOffsetT global_offset = 0,
simd_mask<N> mask = 1) {
#ifdef __ESIMD_FORCE_STATELESS_MEM
return gather_rgba<RGBAMask>(
Expand Down Expand Up @@ -1858,18 +1838,9 @@ __ESIMD_API std::enable_if_t<
(N == 8 || N == 16 || N == 32) && sizeof(T) == 4 &&
!std::is_pointer_v<AccessorT> &&
!sycl::detail::acc_properties::is_local_accessor_v<AccessorT>>
scatter_rgba(AccessorT acc,
#ifdef __ESIMD_FORCE_STATELESS_MEM
simd<uint64_t, N> offsets,
#else
simd<uint32_t, N> offsets,
#endif
scatter_rgba(AccessorT acc, simd<detail::DeviceAccessorOffsetT, N> offsets,
simd<T, N * get_num_channels_enabled(RGBAMask)> vals,
#ifdef __ESIMD_FORCE_STATELESS_MEM
uint64_t global_offset = 0,
#else
uint32_t global_offset = 0,
#endif
detail::DeviceAccessorOffsetT global_offset = 0,
simd_mask<N> mask = 1) {
detail::validate_rgba_write_channel_mask<RGBAMask>();
#ifdef __ESIMD_FORCE_STATELESS_MEM
Expand Down
Loading