Skip to content

Commit 8138d0a

Browse files
ppogotovigcbot
authored andcommitted
Adding AlwaysInlinerLegacyPass to the unify compile step.
Add Always Inliner Pass to inline functions that had problems during the previous inline attempt.
1 parent bce2e7b commit 8138d0a

File tree

3 files changed

+153
-0
lines changed

3 files changed

+153
-0
lines changed

IGC/AdaptorOCL/UnifyIROCL.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,11 @@ static void CommonOCLBasedPasses(
500500

501501
mpm.add(createGASRetValuePropagatorPass());
502502

503+
// this pass is intended to inline the remaining always inline functions that had issues
504+
// with argument address spaces (byVal addrspace(4)) in the previous attempt
505+
mpm.add(createAlwaysInlinerLegacyPass());
506+
mpm.add(new PurgeMetaDataUtils());
507+
503508
// Run another round of constant breaking as GAS resolving may generate constants (constant address)
504509
mpm.add(new BreakConstantExpr());
505510
}

IGC/common/MDFrameWork.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,9 +284,25 @@ MDNode* CreateNode(const MapVector<Key, Value> &FuncMD, Module* module, StringRe
284284
int i = 0;
285285
for ( auto it = FuncMD.begin(); it != FuncMD.end(); ++it)
286286
{
287+
// It is necessary to check that the function was not removed after the inline pass.
288+
// If this happens, the function may be trying to access invalid metadata.
289+
if (name == "FuncMD")
290+
{
291+
auto& functionList = module->getFunctionList();
292+
llvm::Module::FunctionListType::iterator funcIterator = std::find_if(functionList.begin(), functionList.end(), [&it](Function& f)
293+
{
294+
return &f == dyn_cast<Function>(it->first);
295+
});
296+
if (funcIterator == functionList.end())
297+
{
298+
continue;
299+
}
300+
}
301+
287302
nodes.push_back(CreateNode(it->first, module, name.str() + "Map[" + std::to_string(i) + "]"));
288303
nodes.push_back(CreateNode(it->second, module, name.str() + "Value[" +std::to_string(i++) + "]"));
289304
}
305+
290306
MDNode* node = MDNode::get(module->getContext(), nodes);
291307
return node;
292308
}
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: regkeys,pvc-supported,llvm-14-plus
10+
11+
; RUN: llvm-as %s -o %t.bc
12+
; RUN: ocloc compile -llvm_input -file %t.bc -device pvc -options "-igc_opts 'DisableRecompilation=1 PrintToConsole=1 PrintBefore=igc-image-sampler-resolution''" 2>&1 | FileCheck %s --check-prefixes=CHECK
13+
14+
; CHECK-LABEL: @test_kernel(
15+
; CHECK-NOT: call spir_func void @_ZNSt7complexIdEC2ECd
16+
; CHECK: ret void
17+
18+
; This test checks whether functions with byval arguments were inlined during unification passes.
19+
; igc-image-sampler-solve was chosen because it is only used in unify.
20+
21+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
22+
target triple = "spir64-unknown-unknown"
23+
24+
%"class.std::complex" = type { %structtype.4 }
25+
%structtype.4 = type { double, double }
26+
27+
; Function Attrs: nofree nosync nounwind willreturn
28+
declare i8* @llvm.stacksave() #0
29+
30+
; Function Attrs: nofree nosync nounwind willreturn
31+
declare void @llvm.stackrestore(i8*) #0
32+
33+
; Function Attrs: noinline nounwind
34+
define spir_kernel void @test_kernel(%"class.std::complex" addrspace(1)* noalias %0, %"class.std::complex" addrspace(1)* noalias %1, %"class.std::complex" addrspace(1)* %2, i64 %3) #1 !kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2 !kernel_arg_type_qual !3 !kernel_arg_base_type !2 !kernel_arg_name !4 !spirv.ParameterDecorations !5 {
35+
%5 = alloca %"class.std::complex", align 8
36+
%6 = alloca %structtype.4, align 8
37+
%7 = alloca %structtype.4, align 8
38+
%8 = alloca %structtype.4, align 8
39+
%9 = alloca %structtype.4, align 8
40+
%10 = addrspacecast %"class.std::complex"* %5 to %"class.std::complex" addrspace(4)*
41+
%11 = getelementptr inbounds %"class.std::complex", %"class.std::complex" addrspace(1)* %0, i64 0, i32 0, i32 0
42+
%12 = load double, double addrspace(1)* %11, align 8
43+
%13 = getelementptr inbounds %"class.std::complex", %"class.std::complex" addrspace(1)* %0, i64 0, i32 0, i32 1
44+
%14 = load double, double addrspace(1)* %13, align 8
45+
%15 = shl i64 %3, 9
46+
%16 = and i64 %15, 4294966784
47+
%17 = icmp eq i64 %16, 0
48+
br i1 %17, label %50, label %18
49+
50+
18: ; preds = %4
51+
%19 = addrspacecast %structtype.4* %6 to %structtype.4 addrspace(4)*
52+
%20 = addrspacecast %structtype.4* %7 to %structtype.4 addrspace(4)*
53+
%21 = addrspacecast %structtype.4* %8 to %structtype.4 addrspace(4)*
54+
%22 = addrspacecast %structtype.4* %9 to %structtype.4 addrspace(4)*
55+
%23 = getelementptr inbounds %"class.std::complex", %"class.std::complex" addrspace(1)* %1, i64 0, i32 0, i32 0
56+
%24 = load double, double addrspace(1)* %23, align 8
57+
%25 = getelementptr inbounds %"class.std::complex", %"class.std::complex" addrspace(1)* %1, i64 0, i32 0, i32 1
58+
%26 = load double, double addrspace(1)* %25, align 8
59+
%27 = getelementptr inbounds %structtype.4, %structtype.4* %7, i64 0, i32 0
60+
%28 = getelementptr inbounds %structtype.4, %structtype.4* %7, i64 0, i32 1
61+
%29 = getelementptr inbounds %structtype.4, %structtype.4* %8, i64 0, i32 0
62+
%30 = getelementptr inbounds %structtype.4, %structtype.4* %8, i64 0, i32 1
63+
%31 = getelementptr inbounds %structtype.4, %structtype.4* %6, i64 0, i32 0
64+
%32 = getelementptr inbounds %structtype.4, %structtype.4* %6, i64 0, i32 1
65+
%33 = getelementptr inbounds %structtype.4, %structtype.4* %9, i64 0, i32 0
66+
%34 = getelementptr inbounds %structtype.4, %structtype.4* %9, i64 0, i32 1
67+
%35 = getelementptr inbounds %"class.std::complex", %"class.std::complex"* %5, i64 0, i32 0, i32 0
68+
%36 = getelementptr inbounds %"class.std::complex", %"class.std::complex"* %5, i64 0, i32 0, i32 1
69+
br label %37
70+
71+
37: ; preds = %37, %18
72+
%38 = phi i64 [ 0, %18 ], [ %48, %37 ]
73+
%39 = phi double [ %14, %18 ], [ %47, %37 ]
74+
%40 = phi double [ %12, %18 ], [ %46, %37 ]
75+
%41 = call i8* @llvm.stacksave()
76+
store double %24, double* %27, align 8
77+
store double %26, double* %28, align 8
78+
store double %40, double* %29, align 8
79+
store double %39, double* %30, align 8
80+
%42 = load double, double* %31, align 8
81+
%43 = load double, double* %32, align 8
82+
store double %42, double* %33, align 8
83+
store double %43, double* %34, align 8
84+
call spir_func void @_ZNSt7complexIdEC2ECd(%"class.std::complex" addrspace(4)* align 8 %10, %structtype.4 addrspace(4)* byval(%structtype.4) align 8 %22) #2
85+
call void @llvm.stackrestore(i8* %41)
86+
%44 = load double, double* %35, align 8
87+
%45 = load double, double* %36, align 8
88+
%46 = fadd fast double %40, %44
89+
%47 = fadd fast double %39, %45
90+
%48 = add nuw nsw i64 %38, 1
91+
%49 = icmp eq i64 %48, %16
92+
br i1 %49, label %.loopexit, label %37
93+
94+
.loopexit: ; preds = %37
95+
br label %50
96+
97+
50: ; preds = %.loopexit, %4
98+
%51 = phi double [ %12, %4 ], [ %46, %.loopexit ]
99+
%52 = phi double [ %14, %4 ], [ %47, %.loopexit ]
100+
%53 = getelementptr %"class.std::complex", %"class.std::complex" addrspace(1)* %2, i64 0, i32 0, i32 0
101+
store double %51, double addrspace(1)* %53, align 8
102+
%54 = getelementptr inbounds %"class.std::complex", %"class.std::complex" addrspace(1)* %2, i64 0, i32 0, i32 1
103+
store double %52, double addrspace(1)* %54, align 8
104+
ret void
105+
}
106+
107+
; Function Attrs: nounwind
108+
define linkonce_odr spir_func void @_ZNSt7complexIdEC2ECd(%"class.std::complex" addrspace(4)* align 8 %0, %structtype.4 addrspace(4)* byval(%structtype.4) align 8 %1) #2 {
109+
%3 = getelementptr inbounds %structtype.4, %structtype.4 addrspace(4)* %1, i64 0, i32 0
110+
%4 = load double, double addrspace(4)* %3, align 8
111+
%5 = getelementptr inbounds %structtype.4, %structtype.4 addrspace(4)* %1, i64 0, i32 1
112+
%6 = load double, double addrspace(4)* %5, align 8
113+
%7 = getelementptr inbounds %"class.std::complex", %"class.std::complex" addrspace(4)* %0, i64 0, i32 0, i32 0
114+
store double %4, double addrspace(4)* %7, align 8
115+
%8 = getelementptr inbounds %"class.std::complex", %"class.std::complex" addrspace(4)* %0, i64 0, i32 0, i32 1
116+
store double %6, double addrspace(4)* %8, align 8
117+
ret void
118+
}
119+
120+
attributes #0 = { nofree nosync nounwind willreturn }
121+
attributes #1 = { noinline nounwind }
122+
attributes #2 = { nounwind }
123+
124+
!0 = !{i32 1, i32 1, i32 1, i32 0}
125+
!1 = !{!"none", !"none", !"none", !"none"}
126+
!2 = !{!"class.std::complex*", !"class.std::complex*", !"class.std::complex*", !"long"}
127+
!3 = !{!"restrict", !"restrict", !"", !""}
128+
!4 = !{!"", !"", !"", !""}
129+
!5 = !{!6, !6, !8, !8}
130+
!6 = !{!7}
131+
!7 = !{i32 38, i32 4}
132+
!8 = !{}

0 commit comments

Comments
 (0)