Skip to content

Commit 89327e0

Browse files
authored
[SYCL][ESIMD] Require inlining of some noinline functions due to VC limitation (#12440)
We need to inline some `nolinine` functions because VC doesn't support debugging/`O0`/`-g` in order to at least make user code do the right thing. Otherwise, we get wrong answers or GPU hangs. This change fixes 4 `-O0`/`-fno-inline-functions` test failures. --------- Signed-off-by: Nick Sarnie <[email protected]> Signed-off-by: Sarnie, Nick <[email protected]>
1 parent 7b62154 commit 89327e0

File tree

4 files changed

+84
-4
lines changed

4 files changed

+84
-4
lines changed

llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1737,6 +1737,42 @@ bool SYCLLowerESIMDPass::prepareForAlwaysInliner(Module &M) {
17371737
F.addFnAttr(llvm::Attribute::NoInline);
17381738
};
17391739

1740+
bool ModuleContainsGenXVolatile =
1741+
std::any_of(M.global_begin(), M.global_end(), [](const auto &Global) {
1742+
return Global.hasAttribute("genx_volatile");
1743+
});
1744+
1745+
auto requiresInlining = [=](Function &F) {
1746+
// If there are any genx_volatile globals in the module, inline
1747+
// noinline functions because load/store semantics are not valid for
1748+
// these globals and we cannot know for sure if the load/store target
1749+
// is one of these globals without inlining.
1750+
if (ModuleContainsGenXVolatile)
1751+
return true;
1752+
1753+
// Otherwise, only inline esimd namespace functions.
1754+
StringRef MangledName = F.getName();
1755+
id::ManglingParser<SimpleAllocator> Parser(MangledName.begin(),
1756+
MangledName.end());
1757+
id::Node *AST = Parser.parse();
1758+
if (!AST || AST->getKind() != id::Node::KFunctionEncoding)
1759+
return false;
1760+
1761+
auto *FE = static_cast<id::FunctionEncoding *>(AST);
1762+
const id::Node *NameNode = FE->getName();
1763+
if (!NameNode)
1764+
return false;
1765+
1766+
if (NameNode->getKind() == id::Node::KLocalName)
1767+
return false;
1768+
1769+
id::OutputBuffer NameBuf;
1770+
NameNode->print(NameBuf);
1771+
StringRef Name(NameBuf.getBuffer(), NameBuf.getCurrentPosition());
1772+
1773+
return Name.starts_with("sycl::_V1::ext::intel::esimd::") ||
1774+
Name.starts_with("sycl::_V1::ext::intel::experimental::esimd::");
1775+
};
17401776
bool NeedInline = false;
17411777
for (auto &F : M) {
17421778
// If some function already has 'alwaysinline' attribute, then request
@@ -1773,7 +1809,7 @@ bool SYCLLowerESIMDPass::prepareForAlwaysInliner(Module &M) {
17731809
// it had noinline or VCStackCall attrubute.
17741810
// This code migrated to here without changes, but... VC BE does support
17751811
// the calls of spir_func these days, so this code needs re-visiting.
1776-
if (!F.hasFnAttribute(Attribute::NoInline))
1812+
if (!F.hasFnAttribute(Attribute::NoInline) || requiresInlining(F))
17771813
NeedInline |= markAlwaysInlined(F);
17781814

17791815
if (!isSlmInit(F))
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; This test locks down that assert functions are still noinline even if a
2+
; genx_volatile global is present.
3+
;
4+
; RUN: opt < %s -passes=LowerESIMD -S | FileCheck %s
5+
6+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
7+
target triple = "spir64-unknown-unknown"
8+
9+
%"class.sycl::_V1::ext::intel::esimd::simd" = type { %"class.sycl::_V1::ext::intel::esimd::detail::simd_obj_impl" }
10+
%"class.sycl::_V1::ext::intel::esimd::detail::simd_obj_impl" = type { <16 x float> }
11+
12+
@va = dso_local global %"class.sycl::_V1::ext::intel::esimd::simd" zeroinitializer, align 64 #0
13+
14+
define dso_local spir_func void @__assert_fail(ptr addrspace(4) %ptr) {
15+
; CHECK: define dso_local spir_func void @__assert_fail(ptr addrspace(4) %ptr) #[[#ATTR:]] {
16+
ret void
17+
}
18+
19+
define dso_local spir_func void @__devicelib_assert_fail(ptr addrspace(4) %ptr) {
20+
; CHECK: define dso_local spir_func void @__devicelib_assert_fail(ptr addrspace(4) %ptr) #[[#ATTR]] {
21+
ret void
22+
}
23+
24+
; CHECK: attributes #[[#ATTR]] = { noinline }
25+
attributes #0 = { "genx_byte_offset"="192" "genx_volatile" }
26+
!0 = !{}

llvm/test/SYCLLowerIR/ESIMD/force_inline.ll

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,22 @@ define dso_local spir_kernel void @KERNEL(ptr addrspace(4) %ptr) !sycl_explicit_
3737
ret void
3838
}
3939

40+
; Function with "noinline" attribute must be marked with "alwaysinline" if it is an ESIMD namespace function
41+
define dso_local spir_func void @_ZNK4sycl3_V13ext5intel5esimd6detail13simd_obj_implIiLi16ENS3_4simdIiLi16EEEvE4dataEv(ptr addrspace(4) %ptr) #1 {
42+
; CHECK: define dso_local spir_func void @_ZNK4sycl3_V13ext5intel5esimd6detail13simd_obj_implIiLi16ENS3_4simdIiLi16EEEvE4dataEv(ptr addrspace(4) %ptr) #[[ATTRS1]] {
43+
ret void
44+
}
45+
46+
; assert functions must not be marked with "alwaysinline"
47+
define dso_local spir_func void @__assert_fail(ptr addrspace(4) %ptr) {
48+
; CHECK: define dso_local spir_func void @__assert_fail(ptr addrspace(4) %ptr) #[[ATTRS3]] {
49+
ret void
50+
}
51+
52+
define dso_local spir_func void @__devicelib_assert_fail(ptr addrspace(4) %ptr) {
53+
; CHECK: define dso_local spir_func void @__devicelib_assert_fail(ptr addrspace(4) %ptr) #[[ATTRS3]] {
54+
ret void
55+
}
4056

4157
attributes #0 = { "VCStackCall" }
4258
attributes #1 = { noinline }

llvm/test/SYCLLowerIR/ESIMD/lower_global_stores.ll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; This test checks whether global stores are converted to vstores
22
;
3-
; RUN: opt < %s -passes=LowerESIMD -S | FileCheck %s
3+
; RUN: opt < %s -passes=LowerESIMD -S | FileCheck --implicit-check-not=noinline %s
44

55
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
66
target triple = "spir64-unknown-unknown"
@@ -11,7 +11,8 @@ target triple = "spir64-unknown-unknown"
1111
@va = dso_local global %"class.sycl::_V1::ext::intel::esimd::simd" zeroinitializer, align 64 #0
1212
@vb = dso_local global %"class.sycl::_V1::ext::intel::esimd::simd" zeroinitializer, align 64 #0
1313

14-
define weak_odr dso_local spir_kernel void @foo() {
14+
define weak_odr dso_local spir_kernel void @foo() #1 {
15+
; CHECK: define weak_odr dso_local spir_kernel void @foo() #[[#ATTR:]] {
1516
%1 = call <16 x float> asm "", "=rw"()
1617
; CHECK: call void @llvm.genx.vstore.v16f32.p0(<16 x float> %1, ptr @va)
1718
store <16 x float> %1, ptr @va
@@ -21,4 +22,5 @@ ret void
2122
}
2223

2324
attributes #0 = { "genx_byte_offset"="0" "genx_volatile" }
24-
attributes #1 = { "" }
25+
; CHECK: attributes #[[#ATTR]] = { alwaysinline }
26+
attributes #1 = { noinline }

0 commit comments

Comments
 (0)