Skip to content

Commit 972355c

Browse files
wenju-heigcbot
authored andcommitted
Fix SYCL bindless image builtin's user functions not inlined
ProcessFuncAttributes was only marking function that has opaque image argument as alwaysinline. There are two issues exposed in SYCL bindless image: 1. SYCL bindless image handle type could be integer. 2. Function that returns a sampled image should be inlined as well. This PR fixes the two issues.
1 parent f1de9f5 commit 972355c

File tree

2 files changed

+243
-32
lines changed

2 files changed

+243
-32
lines changed

IGC/AdaptorCommon/ProcessFuncAttributes.cpp

Lines changed: 123 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -138,31 +138,31 @@ static void getContainedStructType(Type *T, SmallPtrSetImpl<StructType *> &Tys)
138138
}
139139
}
140140

141+
static bool isImageType(llvm::Type *Ty)
142+
{
143+
if (auto *STy = dyn_cast<StructType>(Ty); STy && STy->isOpaque())
144+
{
145+
auto typeName = STy->getName();
146+
llvm::SmallVector<llvm::StringRef, 3> buf;
147+
typeName.split(buf, ".");
148+
if (buf.size() < 2) return false;
149+
bool isOpenCLImage = buf[0].equals("opencl") && buf[1].startswith("image") && buf[1].endswith("_t");
150+
bool isSPIRVImage = buf[0].equals("spirv") && (buf[1].startswith("Image") || buf[1].startswith("SampledImage"));
151+
152+
if (isOpenCLImage || isSPIRVImage)
153+
return true;
154+
}
155+
return false;
156+
}
157+
141158
// Check the existence of an image type.
142159
static bool containsImageType(llvm::Type *T)
143160
{
144161
// All (nested) struct types in T.
145162
SmallPtrSet<StructType *, 8> StructTys;
146163
getContainedStructType(T, StructTys);
147164

148-
for (auto I = StructTys.begin(), E = StructTys.end(); I != E; ++I)
149-
{
150-
StructType *ST = *I;
151-
if (ST->isOpaque())
152-
{
153-
auto typeName = ST->getName();
154-
llvm::SmallVector<llvm::StringRef, 3> buf;
155-
typeName.split(buf, ".");
156-
if (buf.size() < 2) return false;
157-
bool isOpenCLImage = buf[0].equals("opencl") && buf[1].startswith("image") && buf[1].endswith("_t");
158-
bool isSPIRVImage = buf[0].equals("spirv") && buf[1].startswith("Image");
159-
160-
if (isOpenCLImage || isSPIRVImage)
161-
return true;
162-
}
163-
}
164-
165-
return false;
165+
return llvm::any_of(StructTys, [](StructType *STy) { return isImageType(STy); });
166166
}
167167

168168
static bool isOptNoneBuiltin(StringRef name)
@@ -235,7 +235,7 @@ static DenseSet<Function*> collectMemPoolUsage(const Module &M)
235235
return FuncsToInline;
236236
}
237237

238-
void addFnAttrRecursive(Function* F, StringRef Attr, StringRef Val)
238+
static void addFnAttrRecursive(Function* F, StringRef Attr, StringRef Val)
239239
{
240240
F->addFnAttr(Attr, Val);
241241
for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
@@ -248,6 +248,96 @@ void addFnAttrRecursive(Function* F, StringRef Attr, StringRef Val)
248248
}
249249
}
250250

251+
static void setAlwaysInline(Function* F)
252+
{
253+
F->addFnAttr(llvm::Attribute::AlwaysInline);
254+
F->removeFnAttr(llvm::Attribute::NoInline);
255+
// optnone requires noinline and is incompatible with alwaysinline
256+
F->removeFnAttr(llvm::Attribute::OptimizeNone);
257+
};
258+
259+
static void setAlwaysInlineRecursive(Function* F)
260+
{
261+
if (F->hasFnAttribute(llvm::Attribute::AlwaysInline))
262+
return;
263+
setAlwaysInline(F);
264+
for (auto &I : instructions(F))
265+
{
266+
if (CallInst* CI = dyn_cast<CallInst>(&I))
267+
{
268+
if (Function* Callee = CI->getCalledFunction())
269+
{
270+
setAlwaysInlineRecursive(Callee);
271+
}
272+
}
273+
}
274+
}
275+
276+
static void addAlwaysInlineForImageBuiltinUserFunctions(Module &M)
277+
{
278+
StringRef ImageBuiltinPrefixes[] = {
279+
"__builtin_IB_get_image", "__builtin_IB_get_sampler",
280+
"__builtin_IB_get_snap_wa_reqd", "__builtin_IB_OCL_1d_",
281+
"__builtin_IB_OCL_2d_", "__builtin_IB_OCL_3d_"};
282+
SmallVector<Function *, 16> ImageBuiltins;
283+
SmallVector<Function *, 16> SampledImageFunctions;
284+
for (auto &F : M)
285+
{
286+
if (F.isDeclaration())
287+
{
288+
for (StringRef Prefix : ImageBuiltinPrefixes)
289+
{
290+
if (F.getName().startswith(Prefix))
291+
{
292+
ImageBuiltins.push_back(&F);
293+
break;
294+
}
295+
}
296+
continue;
297+
}
298+
// Check if return type is image.
299+
if (auto *PTy = dyn_cast<PointerType>(F.getReturnType()))
300+
{
301+
if (isImageType(IGCLLVM::getNonOpaquePtrEltTy(PTy)))
302+
{
303+
SampledImageFunctions.push_back(&F);
304+
}
305+
}
306+
}
307+
308+
// Add always inline recursively for user functions of each image builtin.
309+
DenseSet<Function *> Visited;
310+
for (auto *Builtin : ImageBuiltins)
311+
{
312+
SmallVector<Function *, 16> WorkList{Builtin};
313+
Visited.insert(Builtin);
314+
while (!WorkList.empty())
315+
{
316+
Function *F = WorkList.pop_back_val();
317+
for (User *U : F->users())
318+
{
319+
if (auto *CI = dyn_cast<CallInst>(U))
320+
{
321+
auto *Caller = CI->getFunction();
322+
if (Visited.insert(Caller).second)
323+
{
324+
setAlwaysInline(Caller);
325+
WorkList.push_back(Caller);
326+
}
327+
}
328+
}
329+
}
330+
}
331+
332+
// Operand of __builtin_IB_get_image/__builtin_IB_get_sampler could be
333+
// result of a call instruction. The call should be inlined as well,
334+
// otherwise ResolveSampledImageBuiltins isn't able to resolve the two builtins.
335+
for (auto *F : SampledImageFunctions)
336+
{
337+
setAlwaysInlineRecursive(F);
338+
}
339+
}
340+
251341
bool ProcessFuncAttributes::runOnModule(Module& M)
252342
{
253343
MetaDataUtilsWrapper &mduw = getAnalysis<MetaDataUtilsWrapper>();
@@ -282,13 +372,6 @@ bool ProcessFuncAttributes::runOnModule(Module& M)
282372
F->addFnAttr(llvm::Attribute::NoInline);
283373
F->removeFnAttr(llvm::Attribute::AlwaysInline);
284374
};
285-
auto SetAlwaysInline = [](Function* F)->void
286-
{
287-
F->addFnAttr(llvm::Attribute::AlwaysInline);
288-
F->removeFnAttr(llvm::Attribute::NoInline);
289-
// optnone requires noinline and is incompatible with alwaysinline
290-
F->removeFnAttr(llvm::Attribute::OptimizeNone);
291-
};
292375

293376
// Returns true if a function is either import or export and requires external linking
294377
auto NeedsLinking = [](Function* F)->bool
@@ -378,7 +461,7 @@ bool ProcessFuncAttributes::runOnModule(Module& M)
378461
pCtx->type != ShaderType::RAYTRACING_SHADER &&
379462
pCtx->type != ShaderType::COMPUTE_SHADER)
380463
{
381-
SetAlwaysInline(F);
464+
setAlwaysInline(F);
382465
continue;
383466
}
384467
// Set noinline on optnone user functions.
@@ -461,7 +544,7 @@ bool ProcessFuncAttributes::runOnModule(Module& M)
461544
if (!isKernel && (F->getCallingConv() == CallingConv::SPIR_KERNEL))
462545
{
463546
// WA for callable kernels, always inline these.
464-
SetAlwaysInline(F);
547+
setAlwaysInline(F);
465548
continue;
466549
}
467550

@@ -616,7 +699,7 @@ bool ProcessFuncAttributes::runOnModule(Module& M)
616699

617700
if (mustAlwaysInline)
618701
{
619-
SetAlwaysInline(F);
702+
setAlwaysInline(F);
620703
continue;
621704
}
622705

@@ -678,15 +761,15 @@ bool ProcessFuncAttributes::runOnModule(Module& M)
678761
}
679762
else
680763
{
681-
SetAlwaysInline(F);
764+
setAlwaysInline(F);
682765
}
683766
}
684767
}
685768
}
686769
else if (FCtrl == FLAG_FCALL_FORCE_INLINE)
687770
{
688771
// Forced inlining all functions
689-
SetAlwaysInline(F);
772+
setAlwaysInline(F);
690773
}
691774
else
692775
{
@@ -782,7 +865,7 @@ bool ProcessFuncAttributes::runOnModule(Module& M)
782865
{
783866
F->removeFnAttr("referenced-indirectly");
784867
F->removeFnAttr("visaStackCall");
785-
SetAlwaysInline(F);
868+
setAlwaysInline(F);
786869
}
787870
else if (FunctionControlMode == FLAG_FCALL_FORCE_SUBROUTINE)
788871
{
@@ -864,6 +947,14 @@ bool ProcessFuncAttributes::runOnModule(Module& M)
864947
}
865948
}
866949

950+
// Curently, ExtensionArgAnalysis assumes that for all functions that use
951+
// image builtins directly or indirectly, we add alwaysinline attribute.
952+
// For SYCL bindless image, checking function argument isn't enough because
953+
// * image handle could be an integer.
954+
// * image handle could be from a call instead of function argument.
955+
// Therefore, we explore all users functions of image builtin recursively.
956+
addAlwaysInlineForImageBuiltinUserFunctions(M);
957+
867958
return true;
868959
}
869960

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2023 Intel Corporation
4+
;
5+
; This software and the related documents are Intel copyrighted materials,
6+
; and your use of them is governed by the express license under which they were
7+
; provided to you ("License"). Unless the License provides otherwise,
8+
; you may not use, modify, copy, publish, distribute, disclose or transmit this
9+
; software or the related documents without Intel's prior written permission.
10+
;
11+
; This software and the related documents are provided as is, with no express or
12+
; implied warranties, other than those that are expressly stated in the License.
13+
;
14+
;============================ end_copyright_notice =============================
15+
16+
; Check alwaysinline attribute is added to following functions:
17+
; 1. users of image builtin functions, e.g. _ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlNS0_7nd_itemILi3EEEE_clES5_
18+
; 2. function that is returning an image type, e.g. _ZN4sycl3_V13ext6oneapi12experimental6detail31convert_handle_to_sampled_imageI14ocl_image3d_roNS3_17spirv_handle_typeEEEDaT0_
19+
20+
; RUN: igc_opt -igc-process-func-attributes -S %s -o - | FileCheck %s
21+
22+
; CHECK: define internal spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlNS0_7nd_itemILi3EEEE_clES5_() [[MD0:#[0-9]+]]
23+
; CHECK: define internal spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10read_imageINS0_3vecIfLi4EEES6_S6_EET_RKNS3_20sampled_image_handleERKT1_({{.*}}) [[MD0]]
24+
; CHECK: define internal spir_func {{.*}} @_ZN4sycl3_V13ext6oneapi12experimental6detail31convert_handle_to_sampled_imageI14ocl_image3d_roNS3_17spirv_handle_typeEEEDaT0_({{.*}}) [[MD0]]
25+
; CHECK: define internal spir_func void @_ZL19__invoke__ImageReadIN4sycl3_V13vecIfLi4EEE32__spirv_SampledImage__image3d_roS3_ET_T0_T1_({{.*}}) [[MD0]]
26+
; CHECK: define internal spir_func {{.*}} @_Z25__spirv_ImageRead_Rfloat4PU3AS140__spirv_SampledImage__void_2_0_0_0_0_0_0Dv4_f({{.*}}) [[MD1:#[0-9]+]]
27+
; CHECK: attributes [[MD0]] = {{.*}} alwaysinline
28+
; CHECK: attributes [[MD1]] = {{.*}} alwaysinline
29+
30+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
31+
target triple = "spir64-unknown-unknown"
32+
33+
%"class.sycl::_V1::vec" = type { <4 x float> }
34+
%"struct.sycl::_V1::ext::oneapi::experimental::sampled_image_handle" = type { %"struct.sycl::_V1::ext::oneapi::experimental::spirv_handle_type" }
35+
%"struct.sycl::_V1::ext::oneapi::experimental::spirv_handle_type" = type { i64, i64 }
36+
%spirv.SampledImage._void_2_0_0_0_0_0_0 = type opaque
37+
%spirv.Image._void_2_0_0_0_0_0_0 = type opaque
38+
%spirv.Sampler = type opaque
39+
40+
; Function Attrs: noinline nounwind optnone
41+
define internal spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlNS0_7nd_itemILi3EEEE_clES5_() #0 {
42+
entry:
43+
call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10read_imageINS0_3vecIfLi4EEES6_S6_EET_RKNS3_20sampled_image_handleERKT1_(%"class.sycl::_V1::vec" addrspace(4)* noalias align 16 null, %"struct.sycl::_V1::ext::oneapi::experimental::sampled_image_handle" addrspace(4)* align 8 null, %"class.sycl::_V1::vec" addrspace(4)* align 16 null)
44+
ret void
45+
}
46+
47+
; Function Attrs: noinline nounwind optnone
48+
define weak_odr spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10read_imageINS0_3vecIfLi4EEES6_S6_EET_RKNS3_20sampled_image_handleERKT1_(%"class.sycl::_V1::vec" addrspace(4)* noalias align 16 %agg.result, %"struct.sycl::_V1::ext::oneapi::experimental::sampled_image_handle" addrspace(4)* align 8 dereferenceable(16) %imageHandle, %"class.sycl::_V1::vec" addrspace(4)* align 16 dereferenceable(16) %coords) #0 {
49+
entry:
50+
%call = call spir_func %spirv.SampledImage._void_2_0_0_0_0_0_0 addrspace(1)* @_ZN4sycl3_V13ext6oneapi12experimental6detail31convert_handle_to_sampled_imageI14ocl_image3d_roNS3_17spirv_handle_typeEEEDaT0_(%"struct.sycl::_V1::ext::oneapi::experimental::spirv_handle_type"* byval(%"struct.sycl::_V1::ext::oneapi::experimental::spirv_handle_type") align 8 null) #0
51+
call spir_func void @_ZL19__invoke__ImageReadIN4sycl3_V13vecIfLi4EEE32__spirv_SampledImage__image3d_roS3_ET_T0_T1_(%"class.sycl::_V1::vec" addrspace(4)* noalias align 16 %agg.result, %spirv.SampledImage._void_2_0_0_0_0_0_0 addrspace(1)* %call, %"class.sycl::_V1::vec"* byval(%"class.sycl::_V1::vec") align 16 null) #0
52+
ret void
53+
}
54+
55+
; Function Attrs: noinline nounwind optnone
56+
define linkonce_odr spir_func %spirv.SampledImage._void_2_0_0_0_0_0_0 addrspace(1)* @_ZN4sycl3_V13ext6oneapi12experimental6detail31convert_handle_to_sampled_imageI14ocl_image3d_roNS3_17spirv_handle_typeEEEDaT0_(%"struct.sycl::_V1::ext::oneapi::experimental::spirv_handle_type"* byval(%"struct.sycl::_V1::ext::oneapi::experimental::spirv_handle_type") align 8 %raw_handle) #0 {
57+
entry:
58+
%retval = alloca %spirv.SampledImage._void_2_0_0_0_0_0_0 addrspace(1)*, align 8
59+
%retval.ascast = addrspacecast %spirv.SampledImage._void_2_0_0_0_0_0_0 addrspace(1)** %retval to %spirv.SampledImage._void_2_0_0_0_0_0_0 addrspace(1)* addrspace(4)*
60+
%raw_handle.ascast = addrspacecast %"struct.sycl::_V1::ext::oneapi::experimental::spirv_handle_type"* %raw_handle to %"struct.sycl::_V1::ext::oneapi::experimental::spirv_handle_type" addrspace(4)*
61+
%image = getelementptr inbounds %"struct.sycl::_V1::ext::oneapi::experimental::spirv_handle_type", %"struct.sycl::_V1::ext::oneapi::experimental::spirv_handle_type" addrspace(4)* %raw_handle.ascast, i32 0, i32 0
62+
%0 = load i64, i64 addrspace(4)* %image, align 8
63+
%call = call spir_func %spirv.Image._void_2_0_0_0_0_0_0 addrspace(1)* @_Z76__spirv_ConvertHandleToImageINTEL_RPU3AS133__spirv_Image__void_2_0_0_0_0_0_0m(i64 %0)
64+
%sampler = getelementptr inbounds %"struct.sycl::_V1::ext::oneapi::experimental::spirv_handle_type", %"struct.sycl::_V1::ext::oneapi::experimental::spirv_handle_type" addrspace(4)* %raw_handle.ascast, i32 0, i32 1
65+
%1 = load i64, i64 addrspace(4)* %sampler, align 8
66+
%call1 = call spir_func %spirv.Sampler addrspace(2)* @_Z35__spirv_ConvertHandleToSamplerINTELm(i64 %1)
67+
%call2 = call spir_func %spirv.SampledImage._void_2_0_0_0_0_0_0 addrspace(1)* @_Z20__spirv_SampledImagePU3AS133__spirv_Image__void_2_0_0_0_0_0_0PU3AS215__spirv_Sampler(%spirv.Image._void_2_0_0_0_0_0_0 addrspace(1)* %call, %spirv.Sampler addrspace(2)* %call1)
68+
ret %spirv.SampledImage._void_2_0_0_0_0_0_0 addrspace(1)* %call2
69+
}
70+
71+
; Function Attrs: noinline nounwind optnone
72+
define hidden spir_func void @_ZL19__invoke__ImageReadIN4sycl3_V13vecIfLi4EEE32__spirv_SampledImage__image3d_roS3_ET_T0_T1_(%"class.sycl::_V1::vec" addrspace(4)* noalias align 16 %agg.result, %spirv.SampledImage._void_2_0_0_0_0_0_0 addrspace(1)* %Img, %"class.sycl::_V1::vec"* byval(%"class.sycl::_V1::vec") align 16 %Coords) #0 {
73+
entry:
74+
%call1 = call spir_func <4 x float> @_Z25__spirv_ImageRead_Rfloat4PU3AS140__spirv_SampledImage__void_2_0_0_0_0_0_0Dv4_f(%spirv.SampledImage._void_2_0_0_0_0_0_0 addrspace(1)* %Img, <4 x float> zeroinitializer)
75+
ret void
76+
}
77+
78+
; Function Attrs: convergent
79+
define dso_local spir_func <4 x float> @_Z25__spirv_ImageRead_Rfloat4PU3AS140__spirv_SampledImage__void_2_0_0_0_0_0_0Dv4_f(%spirv.SampledImage._void_2_0_0_0_0_0_0 addrspace(1)* %Image, <4 x float> %Coordinate) #1 {
80+
entry:
81+
%0 = bitcast %spirv.SampledImage._void_2_0_0_0_0_0_0 addrspace(1)* %Image to i8 addrspace(1)*
82+
%call.i.i = tail call spir_func i64 @__builtin_IB_get_image(i8 addrspace(1)* %0)
83+
%call1.i.i = tail call spir_func i64 @__builtin_IB_get_sampler(i8 addrspace(1)* %0)
84+
%conv2.i.i = trunc i64 %call1.i.i to i32
85+
%call3.i.i = tail call spir_func i32 @__builtin_IB_get_snap_wa_reqd(i32 %conv2.i.i)
86+
%call19.i.i = tail call spir_func <4 x float> @__builtin_IB_OCL_3d_sample_l(i32 0, i32 %conv2.i.i, <4 x float> zeroinitializer, float 0.000000e+00)
87+
ret <4 x float> %call19.i.i
88+
}
89+
90+
declare spir_func %spirv.SampledImage._void_2_0_0_0_0_0_0 addrspace(1)* @_Z20__spirv_SampledImagePU3AS133__spirv_Image__void_2_0_0_0_0_0_0PU3AS215__spirv_Sampler(%spirv.Image._void_2_0_0_0_0_0_0 addrspace(1)*, %spirv.Sampler addrspace(2)*)
91+
92+
declare spir_func %spirv.Sampler addrspace(2)* @_Z35__spirv_ConvertHandleToSamplerINTELm(i64)
93+
94+
declare spir_func %spirv.Image._void_2_0_0_0_0_0_0 addrspace(1)* @_Z76__spirv_ConvertHandleToImageINTEL_RPU3AS133__spirv_Image__void_2_0_0_0_0_0_0m(i64)
95+
96+
declare spir_func i64 @__builtin_IB_get_image(i8 addrspace(1)*)
97+
98+
declare spir_func i64 @__builtin_IB_get_sampler(i8 addrspace(1)*)
99+
100+
declare spir_func i32 @__builtin_IB_get_snap_wa_reqd(i32)
101+
102+
declare spir_func <4 x float> @__builtin_IB_OCL_3d_sample_l(i32, i32, <4 x float>, float)
103+
104+
attributes #0 = { noinline nounwind optnone }
105+
attributes #1 = { convergent }
106+
107+
!spirv.MemoryModel = !{!0}
108+
!spirv.Source = !{!1}
109+
!spirv.Generator = !{!2}
110+
!igc.functions = !{!3}
111+
!IGCMetadata = !{!4}
112+
!opencl.ocl.version = !{!5}
113+
!opencl.spir.version = !{!5}
114+
115+
!0 = !{i32 2, i32 2}
116+
!1 = !{i32 4, i32 100000}
117+
!2 = !{i16 6, i16 14}
118+
!3 = distinct !{null, null}
119+
!4 = !{!"ModuleMD"}
120+
!5 = !{i32 2, i32 0}

0 commit comments

Comments
 (0)