Skip to content

Commit 72ea00a

Browse files
authored
[SYCL][SYCLLowerWGLocalMemoryPass] Don't inline and erase sycl_device function (intel#18660)
Frontend propagates sycl-forceinline attribute to SYCL_EXTERNAL function that directly calls group_local_memory_for_overwrite. In this case, it is incorrect to inline and erase the SYCL_EXTERNAL function. GlobalOptPass will erases group_local_memory_for_overwrite function.
1 parent 43b3d42 commit 72ea00a

File tree

3 files changed

+59
-7
lines changed

3 files changed

+59
-7
lines changed

llvm/lib/SYCLLowerIR/LowerWGLocalMemory.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/IR/IRBuilder.h"
1515
#include "llvm/IR/InstIterator.h"
1616
#include "llvm/Pass.h"
17+
#include "llvm/SYCLLowerIR/SYCLUtils.h"
1718
#include "llvm/TargetParser/Triple.h"
1819
#include "llvm/Transforms/Utils/Cloning.h"
1920

@@ -110,7 +111,11 @@ static bool inlineGroupLocalMemoryFunc(Module &M) {
110111
for (auto *U : make_early_inc_range(F->users())) {
111112
auto *CI = cast<CallInst>(U);
112113
auto *Caller = CI->getFunction();
114+
// Frontend propagates sycl-forceinline attribute to SYCL_EXTERNAL
115+
// function which directly calls group_local_memory_for_overwrite.
116+
// Don't inline the SYCL_EXTERNAL function.
113117
if (Caller->hasFnAttribute("sycl-forceinline") &&
118+
!sycl::utils::isSYCLExternalFunction(Caller) &&
114119
Visited.insert(Caller).second)
115120
WorkList.push_back(Caller);
116121
if (F != ALMFunc) {
@@ -119,8 +124,6 @@ static bool inlineGroupLocalMemoryFunc(Module &M) {
119124
assert(Result.isSuccess() && "inlining failed");
120125
}
121126
}
122-
if (F != ALMFunc)
123-
F->eraseFromParent();
124127
}
125128

126129
return !Visited.empty();

llvm/test/SYCLLowerIR/group_local_memory_inline.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ target triple = "spir64-unknown-unknown"
2020
; Function Attrs: alwaysinline
2121
define internal spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlNS0_7nd_itemILi1EEEE_clES5_() #0 {
2222
entry:
23-
; CHECK: define internal spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlNS0_7nd_itemILi1EEEE_clES5_(
23+
; CHECK-LABEL: define internal spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlNS0_7nd_itemILi1EEEE_clES5_(
2424
; CHECK: store ptr addrspace(3) @WGLocalMem{{.*}}, ptr addrspace(4) %AllocatedMem{{.*}}, align 8
2525
; CHECK: store ptr addrspace(3) @WGLocalMem{{.*}}, ptr addrspace(4) %AllocatedMem{{.*}}, align 8
2626
; CHECK: store ptr addrspace(3) @WGLocalMem{{.*}}, ptr addrspace(4) %AllocatedMem{{.*}}, align 8
@@ -36,23 +36,23 @@ entry:
3636
ret void
3737
}
3838

39-
; CHECK-NOT: define {{.*}} @_ZN4sycl3_V13ext6oneapi32group_local_memory_for_overwriteIiNS0_5groupILi1EEEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_(
40-
4139
; Function Attrs: alwaysinline
4240
define spir_func void @_ZN4sycl3_V13ext6oneapi32group_local_memory_for_overwriteIiNS0_5groupILi1EEEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_(ptr addrspace(4) sret(%"class.sycl::_V1::multi_ptr") align 8 %agg.result, ptr byval(%"class.sycl::_V1::group") align 8 %g) #1 {
4341
entry:
42+
; CHECK-LABEL: define {{.*}} @_ZN4sycl3_V13ext6oneapi32group_local_memory_for_overwriteIiNS0_5groupILi1EEEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_(
43+
4444
%AllocatedMem = alloca ptr addrspace(3), align 8
4545
%AllocatedMem.ascast = addrspacecast ptr %AllocatedMem to ptr addrspace(4)
4646
%call = call spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64 4, i64 4)
4747
store ptr addrspace(3) %call, ptr addrspace(4) %AllocatedMem.ascast, align 8
4848
ret void
4949
}
5050

51-
; CHECK-NOT: define {{.*}} @_ZN4sycl3_V13ext6oneapi18group_local_memoryIiNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_DpOT1_(
52-
5351
; Function Attrs: alwaysinline
5452
define spir_func void @_ZN4sycl3_V13ext6oneapi18group_local_memoryIiNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_DpOT1_(ptr addrspace(4) sret(%"class.sycl::_V1::multi_ptr") align 8 %agg.result, ptr byval(%"class.sycl::_V1::group") align 8 %g) #1 {
5553
entry:
54+
; CHECK-LABEL: define {{.*}} @_ZN4sycl3_V13ext6oneapi18group_local_memoryIiNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_DpOT1_
55+
5656
%AllocatedMem = alloca ptr addrspace(3), align 8
5757
%AllocatedMem.ascast = addrspacecast ptr %AllocatedMem to ptr addrspace(4)
5858
%call = call spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64 4, i64 4)
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; RUN: opt < %s -passes=sycllowerwglocalmemory -S | FileCheck %s
2+
3+
; `foo` is a SYCL_EXTERNAL function that directly calls `group_local_memory_for_overwrite`.
4+
; Frontend propagates `sycl-forceinline` attribute from `group_local_memory_for_overwrite` to `foo`.
5+
; This test checks that `foo` is not inlined.
6+
7+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1"
8+
target triple = "spir64-unknown-unknown"
9+
10+
%"class.sycl::_V1::multi_ptr" = type { ptr addrspace(3) }
11+
%"class.sycl::_V1::group" = type { %"class.sycl::_V1::range", %"class.sycl::_V1::range", %"class.sycl::_V1::range", %"class.sycl::_V1::id" }
12+
%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" }
13+
%"class.sycl::_V1::detail::array" = type { [3 x i64] }
14+
%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" }
15+
16+
; CHECK: @WGLocalMem = internal addrspace(3) global [0 x i8] poison, align 1
17+
18+
define weak_odr dso_local spir_func void @_Z3fooPPi(ptr addrspace(4) noundef %a) #0 {
19+
entry:
20+
; CHECK-LABEL: define {{.*}} @_Z3fooPPi(
21+
; CHECK: store ptr addrspace(3) @WGLocalMem,
22+
23+
call spir_func void @_ZN4sycl3_V13ext6oneapi32group_local_memory_for_overwriteIiNS0_5groupILi3EEEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_(ptr addrspace(4) null, ptr null)
24+
ret void
25+
}
26+
27+
define linkonce_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi32group_local_memory_for_overwriteIiNS0_5groupILi3EEEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_(ptr addrspace(4) sret(%"class.sycl::_V1::multi_ptr") align 8 %result, ptr noundef byval(%"class.sycl::_V1::group") align 8 %g) #1 {
28+
entry:
29+
; CHECK-LABEL: define {{.*}} @_ZN4sycl3_V13ext6oneapi32group_local_memory_for_overwriteIiNS0_5groupILi3EEEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_(
30+
31+
%AllocatedMem.ascast = addrspacecast ptr %g to ptr addrspace(4)
32+
%call = call spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64 0, i64 1)
33+
store ptr addrspace(3) %call, ptr addrspace(4) %AllocatedMem.ascast, align 8
34+
ret void
35+
}
36+
37+
declare spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64, i64)
38+
39+
define internal spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlNS0_7nd_itemILi1EEEE_clES5_() {
40+
entry:
41+
; CHECK-LABEL: define {{.*}} @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlNS0_7nd_itemILi1EEEE_clES5_(
42+
; CHECK: call spir_func void @_Z3fooPPi(
43+
44+
call spir_func void @_Z3fooPPi(ptr addrspace(4) null)
45+
ret void
46+
}
47+
48+
attributes #0 = { "sycl-forceinline"="true" "sycl-module-id"="group_local_memory_template.cpp" }
49+
attributes #1 = { "sycl-forceinline"="true" }

0 commit comments

Comments
 (0)