Skip to content

Commit 8c3cf96

Browse files
committed
[SYCL][ESIMD] Add new raw_send APIs moving compile time parameters to template arguments
The arguments moved from runtime params to template params need to be compile time constant. Often they end up as compile time constant after constant folding, but not in all cases such as -O0, which will lead to an IGC error. Add new APIs to enforce this requirement. I also updated three LSC APIs to use the new version, but these were already passing in constexpr arguments. Signed-off-by: Sarnie, Nick <[email protected]>
1 parent f65f965 commit 8c3cf96

File tree

6 files changed

+378
-10
lines changed

6 files changed

+378
-10
lines changed

llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,15 +501,28 @@ class ESIMDIntrinDescTable {
501501
{"raw.sends2",
502502
{a(0), a(1), ai1(2), a(3), a(4), a(5), a(6), a(7), a(8), a(9), a(10),
503503
a(11)}}},
504+
{"raw_sends2_constexpr",
505+
{"raw.sends2",
506+
{t(0), t(1), ai1(0), t(2), t(3), t(4), t(5), a(1), a(2), a(3), a(4),
507+
a(5)}}},
504508
{"raw_send2",
505509
{"raw.send2",
506510
{a(0), a(1), ai1(2), a(3), a(4), a(5), a(6), a(7), a(8), a(9)}}},
511+
{"raw_send2_constexpr",
512+
{"raw.send2",
513+
{t(0), t(1), ai1(0), t(2), t(3), t(4), a(1), a(2), a(3), a(4)}}},
507514
{"raw_sends2_noresult",
508515
{"raw.sends2.noresult",
509516
{a(0), a(1), ai1(2), a(3), a(4), a(5), a(6), a(7), a(8), a(9)}}},
517+
{"raw_sends2_noresult_constexpr",
518+
{"raw.sends2.noresult",
519+
{t(0), t(1), ai1(0), t(2), t(3), t(4), a(1), a(2), a(3), a(4)}}},
510520
{"raw_send2_noresult",
511521
{"raw.send2.noresult",
512522
{a(0), a(1), ai1(2), a(3), a(4), a(5), a(6), a(7)}}},
523+
{"raw_send2_noresult_constexpr",
524+
{"raw.send2.noresult",
525+
{t(0), t(1), ai1(0), t(2), t(3), a(1), a(2), a(3)}}},
513526
{"wait", {"dummy.mov", {a(0)}}},
514527
{"dpas2",
515528
{"dpas2", {a(0), a(1), a(2), t(0), t(1), t(2), t(3), t(11), t(12)}}},
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
; RUN: opt < %s -passes=LowerESIMD -S | FileCheck %s
2+
3+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
4+
target triple = "spir64-unknown-unknown"
5+
6+
; Function Attrs: convergent norecurse mustprogress
7+
define dso_local spir_kernel void @_ZTSZ6calleriE12kernel_esimd() !sycl_explicit_simd !3 {
8+
entry:
9+
%0 = add <16 x i16> zeroinitializer, zeroinitializer
10+
%1 = add <8 x i32> zeroinitializer, zeroinitializer
11+
%2 = fadd <16 x float> zeroinitializer, zeroinitializer
12+
%3 = fadd <8 x float> zeroinitializer, zeroinitializer
13+
%4 = add <8 x i16> zeroinitializer, zeroinitializer
14+
%5 = add <8 x i64> zeroinitializer, zeroinitializer
15+
16+
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x float> @llvm.genx.raw.send2.v16f32.v16i1.v8i32(i8 0, i8 -124, <16 x i1> %{{[0-9a-zA-Z_.]+}}, i8 1, i8 2, i8 0, i32 10, i32 0, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <16 x float> %{{[0-9a-zA-Z_.]+}})
17+
%6 = call spir_func noundef <16 x float> @_Z27__esimd_raw_send2_constexprILh0ELh132ELh1ELh2ELh0EfLi16EjLi8ELi16EEN4sycl3_V13ext5intel5esimd6detail15raw_vector_typeIT4_XT5_EE4typeENS6_ItXT8_EE4typeEjjNS6_IT6_XT7_EE4typeES9_(<16 x i16> noundef %0, i32 noundef 10, i32 noundef 0, <8 x i32> noundef %1, <16 x float> noundef %2) #1
18+
19+
; CHECK: call void @llvm.genx.raw.send2.noresult.v8i1.v8i32(i8 0, i8 -125, <8 x i1> %{{[0-9a-zA-Z_.]+}}, i8 2, i8 1, i32 10, i32 0, <8 x i32> %{{[0-9a-zA-Z_.]+}})
20+
call spir_func void @_Z36__esimd_raw_send2_noresult_constexprILh0ELh131ELh2ELh1EjLi8ELi8EEvN4sycl3_V13ext5intel5esimd6detail15raw_vector_typeItXT5_EE4typeEjjNS6_IT3_XT4_EE4typeE(<8 x i16> noundef %4, i32 noundef 10, i32 noundef 0, <8 x i32> noundef %1)
21+
22+
; CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x i32> @llvm.genx.raw.sends2.v8i32.v8i1.v8i64.v8i32(i8 0, i8 -125, <8 x i1> %{{[0-9a-zA-Z_.]+}}, i8 2, i8 1, i8 1, i8 1, i32 0, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %1, <8 x i32> %{{[0-9a-zA-Z_.]+}})
23+
%7 = call spir_func noundef <8 x i32> @_Z28__esimd_raw_sends2_constexprILh0ELh131ELh2ELh1ELh1ELh1EjLi8EmLi8EjLi8ELi8EEN4sycl3_V13ext5intel5esimd6detail15raw_vector_typeIT5_XT6_EE4typeENS6_ItXT11_EE4typeEjjNS6_IT7_XT8_EE4typeENS6_IT9_XT10_EE4typeES9_(<8 x i16> noundef %4, i32 noundef 0, i32 noundef 0, <8 x i64> noundef %5, <8 x i32> noundef %1, <8 x i32> %1)
24+
25+
; CHECK: call void @llvm.genx.raw.sends2.noresult.v16i1.v8i32.v8f32(i8 0, i8 -125, <16 x i1> %{{[0-9a-zA-Z_.]+}}, i8 1, i8 1, i8 0, i32 10, i32 0, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x float> %{{[0-9a-zA-Z_.]+}})
26+
call spir_func void @_Z37__esimd_raw_sends2_noresult_constexprILh0ELh131ELh1ELh1ELh0EjLi8EfLi8ELi16EEvN4sycl3_V13ext5intel5esimd6detail15raw_vector_typeItXT8_EE4typeEjjNS6_IT4_XT5_EE4typeENS6_IT6_XT7_EE4typeE(<16 x i16> %0, i32 10, i32 0, <8 x i32> %1, <8 x float> %3)
27+
28+
ret void
29+
}
30+
!3 = !{}
31+
32+
declare dso_local spir_func noundef <16 x float> @_Z27__esimd_raw_send2_constexprILh0ELh132ELh1ELh2ELh0EfLi16EjLi8ELi16EEN4sycl3_V13ext5intel5esimd6detail15raw_vector_typeIT4_XT5_EE4typeENS6_ItXT8_EE4typeEjjNS6_IT6_XT7_EE4typeES9_(<16 x i16> noundef, i32 noundef, i32 noundef, <8 x i32> noundef, <16 x float> noundef) local_unnamed_addr #1
33+
declare dso_local spir_func void @_Z36__esimd_raw_send2_noresult_constexprILh0ELh131ELh2ELh1EjLi8ELi8EEvN4sycl3_V13ext5intel5esimd6detail15raw_vector_typeItXT5_EE4typeEjjNS6_IT3_XT4_EE4typeE(<8 x i16> noundef, i32 noundef, i32 noundef, <8 x i32> noundef) local_unnamed_addr #1
34+
declare dso_local spir_func noundef <8 x i32> @_Z28__esimd_raw_sends2_constexprILh0ELh131ELh2ELh1ELh1ELh1EjLi8EmLi8EjLi8ELi8EEN4sycl3_V13ext5intel5esimd6detail15raw_vector_typeIT5_XT6_EE4typeENS6_ItXT11_EE4typeEjjNS6_IT7_XT8_EE4typeENS6_IT9_XT10_EE4typeES9_(<8 x i16> noundef, i32 noundef, i32 noundef, <8 x i64> noundef, <8 x i32> noundef, <8 x i32> noundef) local_unnamed_addr #1
35+
declare dso_local spir_func void @_Z37__esimd_raw_sends2_noresult_constexprILh0ELh131ELh1ELh1ELh0EjLi8EfLi8ELi16EEvN4sycl3_V13ext5intel5esimd6detail15raw_vector_typeItXT8_EE4typeEjjNS6_IT4_XT5_EE4typeENS6_IT6_XT7_EE4typeE(<16 x i16> noundef, i32 noundef, i32 noundef, <8 x i32> noundef, <8 x float> noundef) local_unnamed_addr #1

sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,50 @@ __esimd_raw_sends2(uint8_t modifier, uint8_t execSize,
8080
}
8181
#endif // __SYCL_DEVICE_ONLY__
8282

83+
// \brief Raw sends.
84+
//
85+
// @tparam modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
86+
//
87+
// @tparam execSize the execution size.
88+
//
89+
// @tparam numSrc0 the number of GRFs for source-0.
90+
//
91+
// @tparam numSrc1 the number of GRFs for source-1.
92+
//
93+
// @tparam numDst the number of GRFs for destination.
94+
//
95+
// @tparam sfid the shared function ID.
96+
//
97+
// @param pred the predicate to specify enabled channels.
98+
//
99+
// @param exDesc the extended message descriptor.
100+
//
101+
// @param msgDesc the message descriptor.
102+
//
103+
// @param msgSrc0 the first source operand of send message.
104+
//
105+
// @param msgSrc1 the second source operand of send message.
106+
//
107+
// @param msgDst the destination operand of send message.
108+
//
109+
// Returns a simd vector of type Ty1 and size N1.
110+
template <uint8_t modifier, uint8_t execSize, uint8_t numSrc0, uint8_t numSrc1,
111+
uint8_t numDst, uint8_t sfid, typename Ty1, int N1, typename Ty2,
112+
int N2, typename Ty3, int N3, int N = 16>
113+
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty1, N1>
114+
__esimd_raw_sends2_constexpr(__ESIMD_DNS::simd_mask_storage_t<N> pred,
115+
uint32_t exDesc, uint32_t msgDesc,
116+
__ESIMD_DNS::vector_type_t<Ty2, N2> msgSrc0,
117+
__ESIMD_DNS::vector_type_t<Ty3, N3> msgSrc1,
118+
__ESIMD_DNS::vector_type_t<Ty1, N1> msgDst)
119+
#ifdef __SYCL_DEVICE_ONLY__
120+
;
121+
#else
122+
{
123+
__ESIMD_UNSUPPORTED_ON_HOST;
124+
}
125+
#endif // __SYCL_DEVICE_ONLY__
126+
83127
// \brief Raw send.
84128
//
85129
// @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
@@ -121,6 +165,45 @@ __esimd_raw_send2(uint8_t modifier, uint8_t execSize,
121165
}
122166
#endif // __SYCL_DEVICE_ONLY__
123167

168+
// \brief Raw send.
169+
//
170+
// @tparam modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
171+
//
172+
// @tparam execSize the execution size.
173+
//
174+
// @tparam numSrc0 the number of GRFs for source-0.
175+
//
176+
// @tparam numDst the number of GRFs for destination.
177+
//
178+
// @tparam sfid the shared function ID.
179+
//
180+
// @param pred the predicate to specify enabled channels.
181+
//
182+
// @param exDesc the extended message descriptor.
183+
//
184+
// @param msgDesc the message descriptor.
185+
//
186+
// @param msgSrc0 the first source operand of send message.
187+
//
188+
// @param msgDst the destination operand of send message.
189+
//
190+
// Returns a simd vector of type Ty1 and size N1.
191+
//
192+
template <uint8_t modifier, uint8_t execSize, uint8_t numSrc0, uint8_t numDst,
193+
uint8_t sfid, typename Ty1, int N1, typename Ty2, int N2, int N = 16>
194+
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty1, N1>
195+
__esimd_raw_send2_constexpr(__ESIMD_DNS::simd_mask_storage_t<N> pred,
196+
uint32_t exDesc, uint32_t msgDesc,
197+
__ESIMD_DNS::vector_type_t<Ty2, N2> msgSrc0,
198+
__ESIMD_DNS::vector_type_t<Ty1, N1> msgDst)
199+
#ifdef __SYCL_DEVICE_ONLY__
200+
;
201+
#else
202+
{
203+
__ESIMD_UNSUPPORTED_ON_HOST;
204+
}
205+
#endif // __SYCL_DEVICE_ONLY__
206+
124207
// \brief Raw sends.
125208
//
126209
// @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
@@ -161,6 +244,42 @@ __esimd_raw_sends2_noresult(uint8_t modifier, uint8_t execSize,
161244
}
162245
#endif // __SYCL_DEVICE_ONLY__
163246

247+
// \brief Raw sends.
248+
//
249+
// @tparam modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
250+
//
251+
// @tparam execSize the execution size.
252+
//
253+
// @tparam numSrc0 the number of GRFs for source-0.
254+
//
255+
// @tparam numSrc1 the number of GRFs for source-1.
256+
//
257+
// @tparam sfid the shared function ID.
258+
//
259+
// @param pred the predicate to specify enabled channels.
260+
//
261+
// @param exDesc the extended message descriptor.
262+
//
263+
// @param msgDesc the message descriptor.
264+
//
265+
// @param msgSrc0 the first source operand of send message.
266+
//
267+
// @param msgSrc1 the second source operand of send message.
268+
//
269+
template <uint8_t modifier, uint8_t execSize, uint8_t numSrc0, uint8_t numSrc1,
270+
uint8_t sfid, typename Ty1, int N1, typename Ty2, int N2, int N = 16>
271+
__ESIMD_INTRIN void __esimd_raw_sends2_noresult_constexpr(
272+
__ESIMD_DNS::simd_mask_storage_t<N> pred, uint32_t exDesc, uint32_t msgDesc,
273+
__ESIMD_DNS::vector_type_t<Ty1, N1> msgSrc0,
274+
__ESIMD_DNS::vector_type_t<Ty2, N2> msgSrc1)
275+
#ifdef __SYCL_DEVICE_ONLY__
276+
;
277+
#else
278+
{
279+
__ESIMD_UNSUPPORTED_ON_HOST;
280+
}
281+
#endif // __SYCL_DEVICE_ONLY__
282+
164283
// \brief Raw send.
165284
//
166285
// @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
@@ -195,6 +314,37 @@ __esimd_raw_send2_noresult(uint8_t modifier, uint8_t execSize,
195314
}
196315
#endif // __SYCL_DEVICE_ONLY__
197316

317+
// \brief Raw send.
318+
//
319+
// @tparam modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
320+
//
321+
// @tparam execSize the execution size.
322+
//
323+
// @tparam numSrc0 the number of GRFs for source-0.
324+
//
325+
// @tparam sfid the shared function ID.
326+
//
327+
// @param pred the predicate to specify enabled channels.
328+
//
329+
// @param exDesc the extended message descriptor.
330+
//
331+
// @param msgDesc the message descriptor.
332+
//
333+
// @param msgSrc0 the first source operand of send message.
334+
//
335+
template <uint8_t modifier, uint8_t execSize, uint8_t numSrc0, uint8_t sfid,
336+
typename Ty1, int N1, int N = 16>
337+
__ESIMD_INTRIN void __esimd_raw_send2_noresult_constexpr(
338+
__ESIMD_DNS::simd_mask_storage_t<N> pred, uint32_t exDesc, uint32_t msgDesc,
339+
__ESIMD_DNS::vector_type_t<Ty1, N1> msgSrc0)
340+
#ifdef __SYCL_DEVICE_ONLY__
341+
;
342+
#else
343+
{
344+
__ESIMD_UNSUPPORTED_ON_HOST;
345+
}
346+
#endif // __SYCL_DEVICE_ONLY__
347+
198348
/// Represents named barrier synchronization for a subgroup of threads.
199349
/// Available only on PVC
200350
///

0 commit comments

Comments
 (0)