Skip to content

[SYCL][ESIMD][EMU] Excluding 'PromoT' work-around for ESIMD_EMULATOR #5702

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ __ESIMD_INTRIN
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
auto NumBlkDecoded = __SEIEED::ElemsPerAddrDecoding(NumBlk);
__SEIEED::vector_type_t<Ty, N * __SEIEED::ElemsPerAddrDecoding(NumBlk)> V = 0;
ElemsPerAddr = __SEIEED::ElemsPerAddrDecoding(ElemsPerAddr);
Expand Down Expand Up @@ -132,6 +133,7 @@ __ESIMD_INTRIN void __esimd_svm_scatter(
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
auto NumBlkDecoded = __SEIEED::ElemsPerAddrDecoding(NumBlk);
ElemsPerAddr = __SEIEED::ElemsPerAddrDecoding(ElemsPerAddr);
if (sizeof(Ty) == 2)
Expand Down Expand Up @@ -160,6 +162,7 @@ __esimd_svm_block_ld_unaligned(uint64_t addr)
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
__SEIEED::vector_type_t<Ty, N> V;

for (int I = 0; I < N; I++) {
Expand All @@ -178,6 +181,7 @@ __esimd_svm_block_ld(uint64_t addr)
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
__SEIEED::vector_type_t<Ty, N> V;

for (int I = 0; I < N; I++) {
Expand All @@ -196,6 +200,7 @@ __ESIMD_INTRIN void __esimd_svm_block_st(uint64_t addr,
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
for (int I = 0; I < N; I++) {
Ty *Addr = reinterpret_cast<Ty *>(addr + I * sizeof(Ty));
*Addr = vals[I];
Expand All @@ -211,6 +216,7 @@ __esimd_oword_ld_unaligned(SurfIndAliasTy surf_ind, uint32_t offset)
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
__SEIEED::vector_type_t<Ty, N> retv;
sycl::detail::ESIMDDeviceInterface *I =
sycl::detail::getESIMDDeviceInterface();
Expand Down Expand Up @@ -256,6 +262,7 @@ __ESIMD_INTRIN void __esimd_oword_st(SurfIndAliasTy surf_ind, uint32_t offset,
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
offset <<= 4;

sycl::detail::ESIMDDeviceInterface *I =
Expand Down Expand Up @@ -304,6 +311,7 @@ __esimd_svm_gather4_scaled(__SEIEED::vector_type_t<uint64_t, N> addrs,
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
__SEIEED::vector_type_t<Ty, N * get_num_channels_enabled(Mask)> V = 0;
unsigned int Next = 0;
uint64_t Offset = 0;
Expand Down Expand Up @@ -334,6 +342,7 @@ __ESIMD_INTRIN void __esimd_svm_scatter4_scaled(
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
__SEIEED::vector_type_t<Ty, N * get_num_channels_enabled(Mask)> V;
unsigned int Next = 0;
uint64_t Offset = 0;
Expand Down Expand Up @@ -382,6 +391,7 @@ __esimd_gather_scaled2(SurfIndAliasTy surf_ind, uint32_t global_offset,
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
static_assert(N == 1 || N == 8 || N == 16 || N == 32);
static_assert(TySizeLog2 <= 2 && Scale == 0);
static_assert(std::is_integral<Ty>::value || TySizeLog2 == 2);
Expand Down Expand Up @@ -424,6 +434,7 @@ __esimd_scatter_scaled(__SEIEED::simd_mask_storage_t<N> pred,
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
static_assert(N == 1 || N == 8 || N == 16 || N == 32);
static_assert(TySizeLog2 <= 2);
static_assert(std::is_integral<Ty>::value || TySizeLog2 == 2);
Expand Down Expand Up @@ -476,6 +487,7 @@ __esimd_svm_atomic0(__SEIEED::vector_type_t<uint64_t, N> addrs,
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
throw cl::sycl::feature_not_supported();
}
#endif // __SYCL_DEVICE_ONLY__
Expand All @@ -489,6 +501,7 @@ __esimd_svm_atomic1(__SEIEED::vector_type_t<uint64_t, N> addrs,
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
__SEIEED::vector_type_t<Ty, N> retv;

for (int i = 0; i < N; i++) {
Expand Down Expand Up @@ -519,6 +532,7 @@ __esimd_svm_atomic2(__SEIEED::vector_type_t<uint64_t, N> addrs,
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
throw cl::sycl::feature_not_supported();
}
#endif // __SYCL_DEVICE_ONLY__
Expand All @@ -528,6 +542,7 @@ __ESIMD_INTRIN void __esimd_slm_init(size_t size)
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
sycl::detail::getESIMDDeviceInterface()->cm_slm_init_ptr(size);
}
#endif // ifndef __SYCL_DEVICE_ONLY__
Expand All @@ -538,6 +553,7 @@ __ESIMD_INTRIN void __esimd_barrier()
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
sycl::detail::getESIMDDeviceInterface()->cm_barrier_ptr();
}
#endif // __SYCL_DEVICE_ONLY__
Expand All @@ -548,6 +564,7 @@ __ESIMD_INTRIN void __esimd_sbarrier(__SEIEE::split_barrier_action flag)
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
sycl::detail::getESIMDDeviceInterface()->cm_sbarrier_ptr((uint32_t)flag);
}
#endif // __SYCL_DEVICE_ONLY__
Expand All @@ -558,6 +575,7 @@ __ESIMD_INTRIN void __esimd_fence(uint8_t cntl)
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
sycl::detail::getESIMDDeviceInterface()->cm_fence_ptr();
}
#endif // __SYCL_DEVICE_ONLY__
Expand All @@ -573,6 +591,7 @@ __esimd_gather_scaled(__SEIEED::simd_mask_storage_t<N> pred,
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
__SEIEED::vector_type_t<Ty, N> retv = 0;
sycl::detail::ESIMDDeviceInterface *I =
sycl::detail::getESIMDDeviceInterface();
Expand Down Expand Up @@ -644,6 +663,7 @@ __esimd_gather_masked_scaled2(SurfIndAliasTy surf_ind, uint32_t global_offset,
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
static_assert(Scale == 0);

__SEIEED::vector_type_t<Ty, N> retv = 0;
Expand Down Expand Up @@ -693,6 +713,7 @@ __esimd_oword_ld(SurfIndAliasTy surf_ind, uint32_t addr)
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
addr <<= 4;

__SEIEED::vector_type_t<Ty, N> retv;
Expand Down Expand Up @@ -743,6 +764,7 @@ __esimd_gather4_scaled(__SEIEED::simd_mask_storage_t<N> pred,
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
__SEIEED::vector_type_t<Ty, N * get_num_channels_enabled(Mask)> retv = 0;
sycl::detail::ESIMDDeviceInterface *I =
sycl::detail::getESIMDDeviceInterface();
Expand Down Expand Up @@ -788,6 +810,7 @@ __ESIMD_INTRIN void __esimd_scatter4_scaled(
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
sycl::detail::ESIMDDeviceInterface *I =
sycl::detail::getESIMDDeviceInterface();
char *WriteBase;
Expand Down Expand Up @@ -829,6 +852,7 @@ __esimd_dword_atomic0(__SEIEED::simd_mask_storage_t<N> pred,
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
__SEIEED::vector_type_t<Ty, N> retv;

if (surf_ind == __SEIEE::detail::SLM_BTI) {
Expand Down Expand Up @@ -865,6 +889,7 @@ __esimd_dword_atomic1(__SEIEED::simd_mask_storage_t<N> pred,
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
throw cl::sycl::feature_not_supported();
}
#endif // __SYCL_DEVICE_ONLY__
Expand All @@ -878,6 +903,7 @@ __ESIMD_INTRIN __SEIEED::vector_type_t<Ty, N> __esimd_dword_atomic2(
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
throw cl::sycl::feature_not_supported();
}
#endif // __SYCL_DEVICE_ONLY__
Expand Down Expand Up @@ -905,6 +931,7 @@ __esimd_media_ld(TACC handle, unsigned x, unsigned y)
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
__SEIEED::vector_type_t<Ty, M * N> vals;
char *readBase;
uint32_t bpp;
Expand Down Expand Up @@ -1033,6 +1060,7 @@ __ESIMD_INTRIN void __esimd_media_st(TACC handle, unsigned x, unsigned y,
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
sycl::detail::ESIMDDeviceInterface *I =
sycl::detail::getESIMDDeviceInterface();

Expand Down Expand Up @@ -1166,6 +1194,7 @@ __ESIMD_INTRIN __SEIEED::vector_type_t<Ty1, N1> __esimd_raw_sends2(
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
throw cl::sycl::feature_not_supported();
}
#endif // __SYCL_DEVICE_ONLY__
Expand Down Expand Up @@ -1207,6 +1236,7 @@ __esimd_raw_send2(uint8_t modifier, uint8_t execSize,
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
throw cl::sycl::feature_not_supported();
}
#endif // __SYCL_DEVICE_ONLY__
Expand Down Expand Up @@ -1245,6 +1275,7 @@ __ESIMD_INTRIN void __esimd_raw_sends2_noresult(
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
throw cl::sycl::feature_not_supported();
}
#endif // __SYCL_DEVICE_ONLY__
Expand Down Expand Up @@ -1277,6 +1308,7 @@ __ESIMD_INTRIN void __esimd_raw_send2_noresult(
;
#else
{
__esimd_dbg_print(ESIMD_EMU_MEMORY_INTRIN);
throw cl::sycl::feature_not_supported();
}
#endif // __SYCL_DEVICE_ONLY__
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
#include <sycl/ext/intel/experimental/esimd/detail/region.hpp>

#if defined(__ESIMD_DBG_HOST) && !defined(__SYCL_DEVICE_ONLY__)
#define __esimd_dbg_print(a) std::cout << ">>> " << #a << "\n"
#define __esimd_dbg_print(a) \
std::cout << ">>> " << #a << " (" << __FUNCTION__ << ":" << __LINE__ << ")" \
<< "\n"
#else
#define __esimd_dbg_print(a)
#endif // defined(__ESIMD_DBG_HOST) && !defined(__SYCL_DEVICE_ONLY__)
Expand Down
12 changes: 8 additions & 4 deletions sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ ESIMD_INLINE
// TODO (performance) use hardware-supported scale once BE supports it
constexpr int16_t scale = 0;
const auto si = __ESIMD_GET_SURF_HANDLE(acc);

#ifdef __SYCL_DEVICE_ONLY__
if constexpr (sizeof(T) < 4) {
using Tint = std::conditional_t<std::is_integral_v<T>, T,
detail::uint_type_t<sizeof(T)>>;
Expand All @@ -358,7 +358,9 @@ ESIMD_INLINE
const simd<PromoT, N> promo_vals = convert<PromoT>(std::move(vals_int));
__esimd_scatter_scaled<PromoT, N, decltype(si), TypeSizeLog2, scale>(
mask.data(), si, glob_offset, offsets.data(), promo_vals.data());
} else {
} else
#endif
{
__esimd_scatter_scaled<T, N, decltype(si), TypeSizeLog2, scale>(
mask.data(), si, glob_offset, offsets.data(), vals.data());
}
Expand All @@ -376,7 +378,7 @@ gather_impl(AccessorTy acc, simd<uint32_t, N> offsets, uint32_t glob_offset,
// TODO (performance) use hardware-supported scale once BE supports it
constexpr uint32_t scale = 0;
const auto si = get_surface_index(acc);

#ifdef __SYCL_DEVICE_ONLY__
if constexpr (sizeof(T) < 4) {
using Tint = std::conditional_t<std::is_integral_v<T>, T,
detail::uint_type_t<sizeof(T)>>;
Expand All @@ -397,7 +399,9 @@ gather_impl(AccessorTy acc, simd<uint32_t, N> offsets, uint32_t glob_offset,
} else {
return Res;
}
} else {
} else
#endif
{
return __esimd_gather_masked_scaled2<T, N, decltype(si), TypeSizeLog2,
scale>(si, glob_offset, offsets.data(),
mask.data());
Expand Down