Skip to content

Commit 714cfc3

Browse files
authored
[ESIMD] Add ext::intel::experimental::esimd::wait() (#8434)
The function is a very specific tool to set up a scoreboard dependency, add a use for the given value. It also prevents dead/unused value from optimizing away. Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent d95d235 commit 714cfc3

File tree

3 files changed

+31
-0
lines changed

3 files changed

+31
-0
lines changed

llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,7 @@ class ESIMDIntrinDescTable {
505505
{"raw_send2_noresult",
506506
{"raw.send2.noresult",
507507
{a(0), a(1), ai1(2), a(3), a(4), a(5), a(6), a(7)}}},
508+
{"wait", {"dummy.mov", {a(0)}}},
508509
{"dpas2",
509510
{"dpas2", {a(0), a(1), a(2), t(0), t(1), t(2), t(3), t(11), t(12)}}},
510511
{"dpas_nosrc0", {"dpas.nosrc0", {a(0), a(1), t(0)}}},

sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ __ESIMD_INTRIN void __esimd_sbarrier(__ESIMD_ENS::split_barrier_action flag)
2626
}
2727
#endif // __SYCL_DEVICE_ONLY__
2828

29+
#ifdef __SYCL_DEVICE_ONLY__
30+
// Create an explicit data and GPU scoreboard dependency.
31+
__ESIMD_INTRIN void __esimd_wait(uint16_t value);
32+
#endif // __SYCL_DEVICE_ONLY__
33+
2934
// \brief Raw sends load.
3035
//
3136
// @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).

sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,31 @@ __ESIMD_API void named_barrier_signal(uint8_t barrier_id,
253253
0 /*sendc*/, gateway, descriptor, payload, 1 /*pred*/);
254254
}
255255

256+
/// Create explicit scoreboard dependency to avoid device code motion
257+
/// across this call and preserve the \p value computation even
258+
/// if it is unused.
259+
template <typename T, int N>
260+
__ESIMD_API std::enable_if_t<(sizeof(T) * N >= 2)>
261+
wait(__ESIMD_NS::simd<T, N> value) {
262+
#ifdef __SYCL_DEVICE_ONLY__
263+
uint16_t Word = value.template bit_cast_view<uint16_t>()[0];
264+
__esimd_wait(Word);
265+
#endif // __SYCL_DEVICE_ONLY__
266+
}
267+
268+
/// Create explicit scoreboard dependency to avoid device code motion
269+
/// across this call and preserve the \p value computation even
270+
/// if it is unused.
271+
template <typename T, typename RegionT>
272+
__ESIMD_API std::enable_if_t<
273+
(RegionT::length * sizeof(typename RegionT::element_type) >= 2)>
274+
wait(__ESIMD_NS::simd_view<T, RegionT> value) {
275+
#ifdef __SYCL_DEVICE_ONLY__
276+
uint16_t Word = value.template bit_cast_view<uint16_t>()[0];
277+
__esimd_wait(Word);
278+
#endif // __SYCL_DEVICE_ONLY__
279+
}
280+
256281
/// @} sycl_esimd_memory_nbarrier
257282

258283
/// @defgroup sycl_esimd_memory_lsc LSC memory access APIs.

0 commit comments

Comments
 (0)