Skip to content

Commit 1df0038

Browse files
authored
[SYCL][ESIMD] Add support for named barrier APIs (#5583)
* [SYCL][ESIMD] Add support for named barrier APIs Signed-off-by: Sergey Dmitriev <[email protected]>
1 parent beb7277 commit 1df0038

File tree

8 files changed

+220
-7
lines changed

8 files changed

+220
-7
lines changed

llvm/lib/SYCLLowerIR/CMakeLists.txt

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@ endif()
1313
if (NOT TARGET LLVMGenXIntrinsics)
1414
if (NOT DEFINED LLVMGenXIntrinsics_SOURCE_DIR)
1515
set(LLVMGenXIntrinsics_GIT_REPO https://github.com/intel/vc-intrinsics.git)
16-
# commit a9bb6d8040c43404c5fbe3694e59c503d179d19a
17-
# Author: Nikita Rudenko <[email protected]>
18-
# Date: Tue Feb 1 14:57:43 2022 +0000
19-
# Fix attributes are not forwarded for call inst with SEV
20-
set(LLVMGenXIntrinsics_GIT_TAG a9bb6d8040c43404c5fbe3694e59c503d179d19a)
16+
# commit 8b6e209fe1269a2c6470b36dfbaa0e051d2a100f (master)
17+
# Author: Konstantin Vladimirov <[email protected]>
18+
# Date: Tue Feb 8 10:47:03 2022 +0000
19+
# introducing named barrier support in adaptor pass
20+
# named barrier required for DPC++ and other customers
21+
set(LLVMGenXIntrinsics_GIT_TAG 8b6e209fe1269a2c6470b36dfbaa0e051d2a100f)
2122

2223
message(STATUS "vc-intrinsics repo is missing. Will try to download it from ${LLVMGenXIntrinsics_GIT_REPO}")
2324
include(FetchContent)

llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "llvm/Demangle/Demangle.h"
2424
#include "llvm/Demangle/ItaniumDemangle.h"
2525
#include "llvm/GenXIntrinsics/GenXIntrinsics.h"
26+
#include "llvm/GenXIntrinsics/GenXMetadata.h"
2627
#include "llvm/IR/IRBuilder.h"
2728
#include "llvm/IR/InstIterator.h"
2829
#include "llvm/IR/Instructions.h"
@@ -445,6 +446,9 @@ class ESIMDIntrinDescTable {
445446
{"raw_send2_noresult",
446447
{"raw.send2.noresult",
447448
{a(0), a(1), ai1(2), a(3), a(4), a(5), a(6), a(7)}}},
449+
{"nbarrier", {"nbarrier", {a(0), a(1), a(2)}}},
450+
{"raw_send_nbarrier_signal",
451+
{"raw.send.noresult", {a(0), ai1(4), a(1), a(2), a(3)}}},
448452
{"sat", {"sat", {a(0)}}},
449453
{"fptoui_sat", {"fptoui.sat", {a(0)}}},
450454
{"fptosi_sat", {"fptosi.sat", {a(0)}}},
@@ -885,6 +889,34 @@ static void translateUnPackMask(CallInst &CI) {
885889
CI.replaceAllUsesWith(TransCI);
886890
}
887891

892+
// This function sets VCNamedBarrierCount attribute to set
893+
// the number of named barriers required by a kernel
894+
static void translateNbarrierInit(CallInst &CI) {
895+
auto *F = CI.getFunction();
896+
897+
auto *ArgV = CI.getArgOperand(0);
898+
assert(isa<ConstantInt>(ArgV) &&
899+
"integral constant expected for nbarrier count");
900+
901+
auto NewVal = cast<llvm::ConstantInt>(ArgV)->getZExtValue();
902+
assert(NewVal != 0 && "zero nbarrier count being requested");
903+
904+
if (llvm::MDNode *Node = getSLMSizeMDNode(F)) {
905+
if (llvm::Value *OldCount =
906+
getVal(Node->getOperand(genx::KernelMDOp::NBarrierCnt))) {
907+
assert(isa<llvm::ConstantInt>(OldCount) && "integer constant expected");
908+
llvm::Value *NewCount =
909+
llvm::ConstantInt::get(OldCount->getType(), NewVal);
910+
uint64_t OldVal = cast<llvm::ConstantInt>(OldCount)->getZExtValue();
911+
if (OldVal < NewVal)
912+
Node->replaceOperandWith(genx::KernelMDOp::NBarrierCnt,
913+
getMD(NewCount));
914+
}
915+
} else {
916+
llvm_unreachable("esimd_nbarrier_init can only be called by a kernel");
917+
}
918+
}
919+
888920
static bool translateVLoad(CallInst &CI, SmallPtrSet<Type *, 4> &GVTS) {
889921
if (GVTS.find(CI.getType()) != GVTS.end())
890922
return false;
@@ -1406,7 +1438,10 @@ void generateKernelMetadata(Module &M) {
14061438
getMD(llvm::ConstantInt::getNullValue(I32Ty)), // SLM size in bytes
14071439
getMD(llvm::ConstantInt::getNullValue(I32Ty)), // arg offsets
14081440
IOKinds,
1409-
ArgDescs};
1441+
ArgDescs,
1442+
getMD(llvm::ConstantInt::getNullValue(I32Ty)), // named barrier count
1443+
getMD(llvm::ConstantInt::getNullValue(I32Ty)) // regular barrier count
1444+
};
14101445

14111446
// Add this kernel to the root.
14121447
Kernels->addOperand(MDNode::get(Ctx, MDArgs));
@@ -1527,6 +1562,11 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
15271562
ToErase.push_back(CI);
15281563
continue;
15291564
}
1565+
if (Name.startswith("__esimd_nbarrier_init")) {
1566+
translateNbarrierInit(*CI);
1567+
ToErase.push_back(CI);
1568+
continue;
1569+
}
15301570
if (Name.startswith("__esimd_pack_mask")) {
15311571
translatePackMask(*CI);
15321572
ToErase.push_back(CI);

llvm/test/SYCLLowerIR/ESIMD/acc_ptr.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ define weak_odr dso_local spir_kernel void @ESIMDKernel(i32 %_arg_, float addrsp
3030

3131
; CHECK: attributes #[[GENX_MAIN]] = { "CMGenxMain" "oclrt"="1" }
3232
; CHECK: !genx.kernels = !{![[GENX_KERNELS:[0-9]+]]}
33-
; CHECK: ![[GENX_KERNELS]] = !{void (i32, float addrspace(1)*, float addrspace(1)*, i32, float addrspace(1)*)* @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_IO_KINDS:[0-9]+]], ![[ARG_DESCS:[0-9]+]]}
33+
; CHECK: ![[GENX_KERNELS]] = !{void (i32, float addrspace(1)*, float addrspace(1)*, i32, float addrspace(1)*)* @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_IO_KINDS:[0-9]+]], ![[ARG_DESCS:[0-9]+]], i32 0, i32 0}
3434
; CHECK: ![[ARG_KINDS]] = !{i32 0, i32 2, i32 2, i32 0, i32 0}
3535
; CHECK: ![[ARG_IO_KINDS]] = !{i32 0, i32 0, i32 0, i32 0, i32 0}
3636
; CHECK: ![[ARG_DESCS]] = !{!"", !"buffer_t", !"buffer_t", !"", !"svmptr_t"}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
; RUN: opt < %s -LowerESIMD -S | FileCheck %s
2+
3+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
4+
target triple = "spir64-unknown-unknown-sycldevice"
5+
6+
; Function Attrs: convergent norecurse mustprogress
7+
define dso_local spir_kernel void @_ZTSZ6calleriE12kernel_esimd() !sycl_explicit_simd !3 {
8+
entry:
9+
; CHECK: call void @llvm.genx.nbarrier(i8 0, i8 2, i8 0)
10+
call spir_func void @_Z16__esimd_nbarrierhhh(i8 zeroext 0, i8 zeroext 2, i8 zeroext 0)
11+
12+
; CHECK: call void @llvm.genx.raw.send.noresult.i1.v8i32(i32 0, i1 true, i32 3, i32 33554436, <8 x i32> <i32 0, i32 0, i32 67371008, i32 0, i32 0, i32 0, i32 0, i32 0>)
13+
call spir_func void @_Z32__esimd_raw_send_nbarrier_signalIjLi8EEvjjjN2cl4sycl5INTEL3gpu6detail11vector_typeIT_XT0_EE4typeEt(i32 0, i32 3, i32 33554436, <8 x i32> <i32 0, i32 0, i32 67371008, i32 0, i32 0, i32
14+
0, i32 0, i32 0>, i16 zeroext 1)
15+
16+
ret void
17+
}
18+
!3 = !{}
19+
20+
declare dso_local spir_func void @_Z16__esimd_nbarrierhhh(i8 zeroext, i8 zeroext, i8 zeroext) local_unnamed_addr #1
21+
declare dso_local spir_func void @_Z32__esimd_raw_send_nbarrier_signalIjLi8EEvjjjN2cl4sycl5INTEL3gpu6detail11vector_typeIT_XT0_EE4typeEt(i32, i32, i32, <8 x i32>, i16 zeroext)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
; RUN: sycl-post-link -split-esimd -lower-esimd -S %s -o %t.table
2+
; RUN: FileCheck %s -input-file=%t_esimd_0.ll
3+
4+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
5+
target triple = "spir64-unknown-unknown-sycldevice"
6+
7+
; Function Attrs: convergent norecurse mustprogress
8+
define dso_local spir_kernel void @_ZTSZ6calleriE12kernel_esimd() #0 !sycl_explicit_simd !3 {
9+
entry:
10+
tail call spir_func void @_Z21__esimd_nbarrier_inith(i8 zeroext 7)
11+
ret void
12+
}
13+
14+
!3 = !{}
15+
16+
declare dso_local spir_func void @_Z21__esimd_nbarrier_inith(i8 zeroext)
17+
; CHECK: attributes #0 = { {{.*}}"VCNamedBarrierCount"="7"{{.*}} }
18+
19+
attributes #0 = { "sycl-module-id"="a.cpp" }

sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1281,4 +1281,62 @@ __ESIMD_INTRIN void __esimd_raw_send2_noresult(
12811281
}
12821282
#endif // __SYCL_DEVICE_ONLY__
12831283

1284+
/// Represents named barrier synchronization for a subgroup of threads.
1285+
/// Available only on PVC
1286+
///
1287+
/// @param mode - is wait(0) or signal(1)
1288+
///
1289+
/// @param id - barrier id
1290+
///
1291+
/// @param thread_count - number of threads, ignored in 'wait' mode
1292+
__ESIMD_INTRIN void __esimd_nbarrier(uint8_t mode, uint8_t id,
1293+
uint8_t thread_count)
1294+
#ifdef __SYCL_DEVICE_ONLY__
1295+
;
1296+
#else // __SYCL_DEVICE_ONLY__
1297+
{
1298+
throw cl::sycl::feature_not_supported();
1299+
}
1300+
#endif // __SYCL_DEVICE_ONLY__
1301+
1302+
/// Initialize number of named barriers for a kernel
1303+
/// Available only on PVC
1304+
///
1305+
/// @param count - number of named barriers
1306+
__ESIMD_INTRIN void __esimd_nbarrier_init(uint8_t count)
1307+
#ifdef __SYCL_DEVICE_ONLY__
1308+
;
1309+
#else // __SYCL_DEVICE_ONLY__
1310+
{
1311+
throw cl::sycl::feature_not_supported();
1312+
}
1313+
#endif // __SYCL_DEVICE_ONLY__
1314+
1315+
/// Raw send signal to perform signal operation on named barriers
1316+
/// Available only on PVC
1317+
/// @tparam Ty - message element type
1318+
///
1319+
/// @tparam N - message length
1320+
///
1321+
/// @param is_sendc - is sendc
1322+
///
1323+
/// @param extended_descriptor - extended message descriptor
1324+
///
1325+
/// @param descriptor - message descriptor
1326+
///
1327+
/// @param msg_var - source operand of send message
1328+
///
1329+
/// @param pred - predicate for enabled channels
1330+
template <typename Ty, int N>
1331+
__ESIMD_INTRIN void __esimd_raw_send_nbarrier_signal(
1332+
uint32_t is_sendc, uint32_t extended_descriptor, uint32_t descriptor,
1333+
__SEIEED::vector_type_t<Ty, N> msg_var, uint16_t pred = 1)
1334+
#ifdef __SYCL_DEVICE_ONLY__
1335+
;
1336+
#else // __SYCL_DEVICE_ONLY__
1337+
{
1338+
throw cl::sycl::feature_not_supported();
1339+
}
1340+
#endif // __SYCL_DEVICE_ONLY__
1341+
12841342
/// @endcond ESIMD_DETAIL

sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1221,8 +1221,63 @@ raw_send_store(simd<T1, n1> msgSrc0, uint32_t exDesc, uint32_t msgDesc,
12211221
numSrc0, sfid, exDesc, msgDesc,
12221222
msgSrc0.data());
12231223
}
1224+
12241225
/// @} sycl_esimd_raw_send
12251226

1227+
/// @defgroup sycl_esimd_memory_nbarrier Named barrier APIs.
1228+
/// @ingroup sycl_esimd_memory
1229+
1230+
/// @addtogroup sycl_esimd_memory_nbarrier
1231+
/// @{
1232+
1233+
/// Wait on a named barrier
1234+
/// Available only on PVC
1235+
///
1236+
/// @param id - named barrier id
1237+
__ESIMD_API void nbarrier_wait(uint8_t id) {
1238+
__esimd_nbarrier(0 /*wait*/, id, 0 /*thread count*/);
1239+
}
1240+
1241+
/// Initialize number of named barriers for a kernel
1242+
/// Available only on PVC
1243+
///
1244+
/// @tparam NbarCount - number of named barriers
1245+
template <uint8_t NbarCount> __ESIMD_API void nbarrier_init() {
1246+
__esimd_nbarrier_init(NbarCount);
1247+
}
1248+
1249+
/// Perform signal operation for the given named barrier
1250+
/// Available only on PVC
1251+
///
1252+
/// @param barrier_id - named barrier id
1253+
///
1254+
/// @param producer_consumer_mode - 2-bit flag to indicate if it's producer
1255+
/// mode (0x1) or consumer mode (0x2). User must ensure the input value is set
1256+
/// correctly and higher order bits are cleared.
1257+
///
1258+
/// @param num_producers - number of producers
1259+
///
1260+
/// @param num_consumers - number of consumers
1261+
__ESIMD_API void nbarrier_signal(uint8_t barrier_id,
1262+
uint8_t producer_consumer_mode,
1263+
uint32_t num_producers,
1264+
uint32_t num_consumers) {
1265+
constexpr uint32_t gateway = 3;
1266+
constexpr uint32_t barrier = 4;
1267+
constexpr uint32_t descriptor = 1 << 25 | // Message length: 1 register
1268+
0 << 12 | // Fence Data Ports: No fence
1269+
barrier; // Barrier subfunction
1270+
1271+
detail::vector_type_t<uint32_t, 8> payload = 0;
1272+
payload[2] = (num_consumers & 0xff) << 24 | (num_producers & 0xff) << 16 |
1273+
producer_consumer_mode << 14 | (barrier_id & 0b11111) << 0;
1274+
1275+
__esimd_raw_send_nbarrier_signal<uint32_t, 8>(
1276+
0 /*sendc*/, gateway, descriptor, payload, 1 /*pred*/);
1277+
}
1278+
1279+
/// @} sycl_esimd_memory_nbarrier
1280+
12261281
#undef __ESIMD_GET_SURF_HANDLE
12271282

12281283
/// @cond EXCLUDE

sycl/test/esimd/nbarriers.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// RUN: %clangxx -fsycl -c -fsycl-device-only -Xclang -emit-llvm %s -o %t
2+
3+
#include <CL/sycl.hpp>
4+
#include <sycl/ext/intel/experimental/esimd.hpp>
5+
6+
using namespace sycl::ext::intel::experimental::esimd;
7+
8+
template <typename name, typename Func>
9+
__attribute__((sycl_kernel)) void kernel(Func kernelFunc) {
10+
kernelFunc();
11+
}
12+
13+
void caller(int x) {
14+
kernel<class kernel_esimd>([=]() SYCL_ESIMD_KERNEL {
15+
nbarrier_init<7>();
16+
nbarrier_wait(2);
17+
nbarrier_signal(0, 0, 4, 4);
18+
});
19+
}

0 commit comments

Comments
 (0)