Skip to content

Commit a72409e

Browse files
authored
[SYCL][ESIMD] Add supported versions of raw_send APIs (#11333)
Some design decisions I made here: 1) Only add the versions with must-be-constant arguments as template parameters. The original versions had them as runtime parameters and it worked some times, but if the compiler can't optimize it, it will fail. It seems sane to me to only add the versions which are guaranteed to be correct. 2) Replace `eot`/`sendc` arguments with enums, we discussed this one and is hopefully not controversial 3) Move the intrinsics to supported and call the supported intrinsics in the experiemntal code just to prevent code duplication Also, the `histrogram_raw_send` test doesn't even compile in HEAD, we didn't catch it because it requires gen9 and we don't test that anymore. It was doing something insane with `execSize` and once we made mask len == exec size it broke. They aren't even using the mask so just remove it to fix the test. Manually verified both modified tests pass. --------- Signed-off-by: Sarnie, Nick <[email protected]>
1 parent 43336a1 commit a72409e

File tree

8 files changed

+383
-186
lines changed

8 files changed

+383
-186
lines changed

sycl/include/sycl/ext/intel/esimd.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@
6565
/// @defgroup sycl_esimd_raw_send Raw send APIs.
6666
/// Implements the \c send instruction to send messages to variaous components
6767
/// of the Intel(R) processor graphics, as defined in the documentation at
68-
/// https://01.org/sites/default/files/documentation/intel-gfx-prm-osrc-icllp-vol02a-commandreference-instructions_2.pdf
68+
/// https://www.intel.com/content/www/us/en/docs/graphics-for-linux/developer-reference/1-0/hardware-specs.html
6969

7070
/// @defgroup sycl_esimd_misc Miscellaneous ESIMD convenience functions.
7171

sycl/include/sycl/ext/intel/esimd/common.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,18 @@ enum class rgba_channel : uint8_t { R, G, B, A };
6363
/// identified by its "binding table index" - surface index.
6464
using SurfaceIndex = unsigned int;
6565

66+
/// Specify if end of thread should be set.
67+
enum class raw_send_eot : uint8_t {
68+
not_eot = 0,
69+
eot = 1,
70+
};
71+
72+
/// Specify if sendc should be used.
73+
enum class raw_send_sendc : uint8_t {
74+
not_sendc = 0,
75+
sendc = 1,
76+
};
77+
6678
namespace detail {
6779

6880
// Type used in internal functions to designate SLM access by

sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,4 +581,168 @@ ESIMD_INLINE __ESIMD_NS::SurfaceIndex __esimd_get_surface_index(MemObjTy obj)
581581
}
582582
#endif // __SYCL_DEVICE_ONLY__
583583

584+
// \brief Raw sends.
585+
//
586+
// @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
587+
//
588+
// @param execSize the execution size, which must be a compile time constant.
589+
//
590+
// @param pred the predicate to specify enabled channels.
591+
//
592+
// @param numSrc0 the number of GRFs for source-0, which must be a compile time
593+
// constant.
594+
//
595+
// @param numSrc1 the number of GRFs for source-1, which must be a compile time
596+
// constant.
597+
//
598+
// @param numDst the number of GRFs for destination, which must be a compile
599+
// time constant.
600+
//
601+
// @param sfid the shared function ID, which must be a compile time constant.
602+
//
603+
// @param exDesc the extended message descriptor.
604+
//
605+
// @param msgDesc the message descriptor.
606+
//
607+
// @param msgSrc0 the first source operand of send message.
608+
//
609+
// @param msgSrc1 the second source operand of send message.
610+
//
611+
// @param msgDst the destination operand of send message.
612+
//
613+
// Returns a simd vector of type Ty1 and size N1.
614+
//
615+
template <typename Ty1, int N1, typename Ty2, int N2, typename Ty3, int N3,
616+
int N = 16>
617+
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty1, N1>
618+
__esimd_raw_sends2(uint8_t modifier, uint8_t execSize,
619+
__ESIMD_DNS::simd_mask_storage_t<N> pred, uint8_t numSrc0,
620+
uint8_t numSrc1, uint8_t numDst, uint8_t sfid,
621+
uint32_t exDesc, uint32_t msgDesc,
622+
__ESIMD_DNS::vector_type_t<Ty2, N2> msgSrc0,
623+
__ESIMD_DNS::vector_type_t<Ty3, N3> msgSrc1,
624+
__ESIMD_DNS::vector_type_t<Ty1, N1> msgDst)
625+
#ifdef __SYCL_DEVICE_ONLY__
626+
;
627+
#else
628+
{
629+
__ESIMD_UNSUPPORTED_ON_HOST;
630+
}
631+
#endif // __SYCL_DEVICE_ONLY__
632+
633+
// \brief Raw send.
634+
//
635+
// @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
636+
//
637+
// @param execSize the execution size, which must be a compile time constant.
638+
//
639+
// @param pred the predicate to specify enabled channels.
640+
//
641+
// @param numSrc0 the number of GRFs for source-0, which must be a compile time
642+
// constant.
643+
//
644+
// @param numDst the number of GRFs for destination, which must be a compile
645+
// time constant.
646+
//
647+
// @param sfid the shared function ID, which must be a compile time constant.
648+
//
649+
// @param exDesc the extended message descriptor.
650+
//
651+
// @param msgDesc the message descriptor.
652+
//
653+
// @param msgSrc0 the first source operand of send message.
654+
//
655+
// @param msgDst the destination operand of send message.
656+
//
657+
// Returns a simd vector of type Ty1 and size N1.
658+
//
659+
template <typename Ty1, int N1, typename Ty2, int N2, int N = 16>
660+
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty1, N1>
661+
__esimd_raw_send2(uint8_t modifier, uint8_t execSize,
662+
__ESIMD_DNS::simd_mask_storage_t<N> pred, uint8_t numSrc0,
663+
uint8_t numDst, uint8_t sfid, uint32_t exDesc,
664+
uint32_t msgDesc, __ESIMD_DNS::vector_type_t<Ty2, N2> msgSrc0,
665+
__ESIMD_DNS::vector_type_t<Ty1, N1> msgDst)
666+
#ifdef __SYCL_DEVICE_ONLY__
667+
;
668+
#else
669+
{
670+
__ESIMD_UNSUPPORTED_ON_HOST;
671+
}
672+
#endif // __SYCL_DEVICE_ONLY__
673+
674+
// \brief Raw sends.
675+
//
676+
// @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
677+
//
678+
// @param execSize the execution size, which must be a compile time constant.
679+
//
680+
// @param pred the predicate to specify enabled channels.
681+
//
682+
// @param numSrc0 the number of GRFs for source-0, which must be a compile time
683+
// constant.
684+
//
685+
// @param numSrc1 the number of GRFs for source-1, which must be a compile time
686+
// constant.
687+
//
688+
// @param sfid the shared function ID, which must be a compile time constant.
689+
//
690+
// @param exDesc the extended message descriptor.
691+
//
692+
// @param msgDesc the message descriptor.
693+
//
694+
// @param msgSrc0 the first source operand of send message.
695+
//
696+
// @param msgSrc1 the second source operand of send message.
697+
//
698+
template <typename Ty1, int N1, typename Ty2, int N2, int N = 16>
699+
__ESIMD_INTRIN void
700+
__esimd_raw_sends2_noresult(uint8_t modifier, uint8_t execSize,
701+
__ESIMD_DNS::simd_mask_storage_t<N> pred,
702+
uint8_t numSrc0, uint8_t numSrc1, uint8_t sfid,
703+
uint32_t exDesc, uint32_t msgDesc,
704+
__ESIMD_DNS::vector_type_t<Ty1, N1> msgSrc0,
705+
__ESIMD_DNS::vector_type_t<Ty2, N2> msgSrc1)
706+
#ifdef __SYCL_DEVICE_ONLY__
707+
;
708+
#else
709+
{
710+
__ESIMD_UNSUPPORTED_ON_HOST;
711+
}
712+
#endif // __SYCL_DEVICE_ONLY__
713+
714+
// \brief Raw send.
715+
//
716+
// @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
717+
//
718+
// @param execSize the execution size, which must be a compile time constant.
719+
//
720+
// @param pred the predicate to specify enabled channels.
721+
//
722+
// @param numSrc0 the number of GRFs for source-0, which must be a compile time
723+
// constant.
724+
//
725+
// @param sfid the shared function ID, which must be a compile time constant.
726+
//
727+
// @param exDesc the extended message descriptor.
728+
//
729+
// @param msgDesc the message descriptor.
730+
//
731+
// @param msgSrc0 the first source operand of send message.
732+
//
733+
template <typename Ty1, int N1, int N = 16>
734+
__ESIMD_INTRIN void
735+
__esimd_raw_send2_noresult(uint8_t modifier, uint8_t execSize,
736+
__ESIMD_DNS::simd_mask_storage_t<N> pred,
737+
uint8_t numSrc0, uint8_t sfid, uint32_t exDesc,
738+
uint32_t msgDesc,
739+
__ESIMD_DNS::vector_type_t<Ty1, N1> msgSrc0)
740+
#ifdef __SYCL_DEVICE_ONLY__
741+
;
742+
#else
743+
{
744+
__ESIMD_UNSUPPORTED_ON_HOST;
745+
}
746+
#endif // __SYCL_DEVICE_ONLY__
747+
584748
/// @endcond ESIMD_DETAIL

sycl/include/sycl/ext/intel/esimd/memory.hpp

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2313,6 +2313,170 @@ scatter_rgba(AccessorT acc, simd<uint32_t, N> offsets,
23132313
__ESIMD_DNS::localAccessorToOffset(acc),
23142314
vals, mask);
23152315
}
2316+
2317+
/// @addtogroup sycl_esimd_raw_send
2318+
/// @{
2319+
2320+
/// Raw sends. "s" suffix designates "split" variant - i.e. two sources.
2321+
/// This is a low-level API not recommended for general usage.
2322+
///
2323+
/// @tparam exec_size is the execution size.
2324+
/// @tparam sfid is the shared function ID.
2325+
/// @tparam num_src0 is the number of GRFs for source-0.
2326+
/// @tparam num_src1 is the number of GRFs for source-1.
2327+
/// @tparam num_dst is the number of GRFs for destination.
2328+
/// @tparam eot is the flag that indicates whether this is an EOT message
2329+
/// (optional - default to off).
2330+
/// @tparam sendc is the flag that indicates whether sendc should be used
2331+
/// (optional - default to off).
2332+
/// @param msg_dst is the old value of the destination operand.
2333+
/// @param msg_src0 is the first source operand of send message.
2334+
/// @param msg_src1 is the second source operand of send message.
2335+
/// @param ex_desc is the extended message descriptor.
2336+
/// @param msg_desc is the message descriptor.
2337+
/// @param mask is the predicate to specify enabled channels (optional - default
2338+
/// to on).
2339+
/// @return the vector value read from memory.
2340+
template <uint8_t exec_size, uint8_t sfid, uint8_t num_src0, uint8_t num_src1,
2341+
uint8_t num_dst, raw_send_eot eot = raw_send_eot::not_eot,
2342+
raw_send_sendc sendc = raw_send_sendc::not_sendc, typename T1, int n1,
2343+
typename T2, int n2, typename T3, int n3>
2344+
__ESIMD_API __ESIMD_NS::simd<T1, n1>
2345+
raw_sends(__ESIMD_NS::simd<T1, n1> msg_dst, __ESIMD_NS::simd<T2, n2> msg_src0,
2346+
__ESIMD_NS::simd<T3, n3> msg_src1, uint32_t ex_desc,
2347+
uint32_t msg_desc, __ESIMD_NS::simd_mask<exec_size> mask = 1) {
2348+
constexpr unsigned _Width1 = n1 * sizeof(T1);
2349+
static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar");
2350+
constexpr unsigned _Width2 = n2 * sizeof(T2);
2351+
static_assert(_Width2 % 32 == 0, "Invalid size for raw send msg_src0");
2352+
constexpr unsigned _Width3 = n3 * sizeof(T3);
2353+
static_assert(_Width3 % 32 == 0, "Invalid size for raw send msg_src1");
2354+
2355+
using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
2356+
using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
2357+
using ElemT3 = __ESIMD_DNS::__raw_t<T3>;
2358+
2359+
constexpr uint8_t modifier =
2360+
((eot == raw_send_eot::eot) << 1) | (sendc == raw_send_sendc::sendc);
2361+
2362+
return __esimd_raw_sends2<ElemT1, n1, ElemT2, n2, ElemT3, n3, exec_size>(
2363+
modifier, exec_size, mask.data(), num_src0, num_src1, num_dst, sfid,
2364+
ex_desc, msg_desc, msg_src0.data(), msg_src1.data(), msg_dst.data());
2365+
}
2366+
2367+
/// Raw send. This is a low-level API not recommended for general usage.
2368+
///
2369+
/// @tparam exec_size is the execution size.
2370+
/// @tparam sfid is the shared function ID.
2371+
/// @tparam num_src0 is the number of GRFs for source-0.
2372+
/// @tparam num_dst is the number of GRFs for destination.
2373+
/// @tparam eot is the flag that indicates whether this is an EOT message
2374+
/// (optional - default to off).
2375+
/// @tparam sendc is the flag that indicates whether sendc should be used
2376+
/// (optional - default to off).
2377+
/// @param msg_dst is the old value of the destination operand.
2378+
/// @param msg_src0 is the first source operand of send message.
2379+
/// @param ex_desc is the extended message descriptor.
2380+
/// @param msg_desc is the message descriptor.
2381+
/// @param mask is the predicate to specify enabled channels (optional - default
2382+
/// to on).
2383+
/// @return the vector value read from memory
2384+
template <uint8_t exec_size, uint8_t sfid, uint8_t num_src0, uint8_t num_dst,
2385+
raw_send_eot eot = raw_send_eot::not_eot,
2386+
raw_send_sendc sendc = raw_send_sendc::not_sendc, typename T1, int n1,
2387+
typename T2, int n2>
2388+
__ESIMD_API __ESIMD_NS::simd<T1, n1>
2389+
raw_send(__ESIMD_NS::simd<T1, n1> msg_dst, __ESIMD_NS::simd<T2, n2> msg_src0,
2390+
uint32_t ex_desc, uint32_t msg_desc,
2391+
__ESIMD_NS::simd_mask<exec_size> mask = 1) {
2392+
constexpr unsigned _Width1 = n1 * sizeof(T1);
2393+
static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar");
2394+
constexpr unsigned _Width2 = n2 * sizeof(T2);
2395+
static_assert(_Width2 % 32 == 0, "Invalid size for raw send msg_src0");
2396+
2397+
using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
2398+
using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
2399+
2400+
constexpr uint8_t modifier =
2401+
((eot == raw_send_eot::eot) << 1) | (sendc == raw_send_sendc::sendc);
2402+
return __esimd_raw_send2<ElemT1, n1, ElemT2, n2, exec_size>(
2403+
modifier, exec_size, mask.data(), num_src0, num_dst, sfid, ex_desc,
2404+
msg_desc, msg_src0.data(), msg_dst.data());
2405+
}
2406+
2407+
/// Raw sends. "s" suffix designates "split" variant - i.e. two sources.
2408+
/// This is a low-level API not recommended for general usage.
2409+
///
2410+
/// @tparam exec_size is the execution size.
2411+
/// @tparam sfid is the shared function ID.
2412+
/// @tparam num_src0 is the number of GRFs for source-0.
2413+
/// @tparam num_src1 is the number of GRFs for source-1.
2414+
/// @tparam eot is the flag that indicates whether this is an EOT message
2415+
/// (optional - default to off).
2416+
/// @tparam sendc is the flag that indicates whether sendc should be used
2417+
/// (optional - default to off).
2418+
/// @param msg_src0 is the first source operand of send message.
2419+
/// @param msg_src1 is the second source operand of send message.
2420+
/// @param ex_desc is the extended message descriptor.
2421+
/// @param msg_desc is the message descriptor.
2422+
/// @param mask is the predicate to specify enabled channels (optional - default
2423+
/// to on).
2424+
template <uint8_t exec_size, uint8_t sfid, uint8_t num_src0, uint8_t num_src1,
2425+
raw_send_eot eot = raw_send_eot::not_eot,
2426+
raw_send_sendc sendc = raw_send_sendc::not_sendc, typename T1, int n1,
2427+
typename T2, int n2>
2428+
__ESIMD_API void raw_sends(__ESIMD_NS::simd<T1, n1> msg_src0,
2429+
__ESIMD_NS::simd<T2, n2> msg_src1, uint32_t ex_desc,
2430+
uint32_t msg_desc,
2431+
__ESIMD_NS::simd_mask<exec_size> mask = 1) {
2432+
constexpr unsigned _Width1 = n1 * sizeof(T1);
2433+
static_assert(_Width1 % 32 == 0, "Invalid size for raw send msg_src0");
2434+
constexpr unsigned _Width2 = n2 * sizeof(T2);
2435+
static_assert(_Width2 % 32 == 0, "Invalid size for raw send msg_src1");
2436+
2437+
using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
2438+
using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
2439+
2440+
constexpr uint8_t modifier =
2441+
((eot == raw_send_eot::eot) << 1) | (sendc == raw_send_sendc::sendc);
2442+
__esimd_raw_sends2_noresult<ElemT1, n1, ElemT2, n2, exec_size>(
2443+
modifier, exec_size, mask.data(), num_src0, num_src1, sfid, ex_desc,
2444+
msg_desc, msg_src0.data(), msg_src1.data());
2445+
}
2446+
2447+
/// Raw send. Generates a \c send or \c sendc instruction for the message
2448+
/// gateway. This is a low-level API not recommended for general usage.
2449+
///
2450+
/// @tparam exec_size is the execution size.
2451+
/// @tparam sfid is the shared function ID.
2452+
/// @tparam num_src0 is the number of GRFs for source-0.
2453+
/// @tparam eot is the flag that indicates whether this is an EOT message
2454+
/// (optional - default to off).
2455+
/// @tparam sendc is the flag that indicates whether sendc should be used
2456+
/// (optional - default to off).
2457+
/// @param msg_src0 is the first source operand of send message.
2458+
/// @param ex_desc is the extended message descriptor.
2459+
/// @param msg_desc is the message descriptor.
2460+
/// @param mask is the predicate to specify enabled channels (optional - default
2461+
/// to on).
2462+
template <uint8_t exec_size, uint8_t sfid, uint8_t num_src0,
2463+
raw_send_eot eot = raw_send_eot::not_eot,
2464+
raw_send_sendc sendc = raw_send_sendc::not_sendc, typename T1, int n1>
2465+
__ESIMD_API void raw_send(__ESIMD_NS::simd<T1, n1> msg_src0, uint32_t ex_desc,
2466+
uint32_t msg_desc,
2467+
__ESIMD_NS::simd_mask<exec_size> mask = 1) {
2468+
constexpr unsigned _Width1 = n1 * sizeof(T1);
2469+
static_assert(_Width1 % 32 == 0, "Invalid size for raw send msg_src0");
2470+
using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
2471+
constexpr uint8_t modifier =
2472+
((eot == raw_send_eot::eot) << 1) | (sendc == raw_send_sendc::sendc);
2473+
__esimd_raw_send2_noresult<ElemT1, n1, exec_size>(
2474+
modifier, exec_size, mask.data(), num_src0, sfid, ex_desc, msg_desc,
2475+
msg_src0.data());
2476+
}
2477+
2478+
/// @} sycl_esimd_raw_send
2479+
23162480
/// @} sycl_esimd_memory
23172481

23182482
/// @cond EXCLUDE

0 commit comments

Comments
 (0)