Skip to content

Commit 142c28f

Browse files
committed
[AMDGPU] Modify adjustInliningThreshold to also consider the cost of passing function arguments through the stack
A regression from when new PM got enabled as default. Functions with a big number of instructions will elide getting inlined but do not consider the cost of passing arguments over stack if there are a lot of function arguments. This patch attempts to add a heuristic for AMDGPU's function calling convention that also considers function arguments passed through the stack. Reviewed By: #amdgpu, arsenm Differential Revision: https://reviews.llvm.org/D140242
1 parent f03b895 commit 142c28f

7 files changed

+836
-3
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717
#include "AMDGPUTargetTransformInfo.h"
1818
#include "AMDGPUTargetMachine.h"
1919
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20+
#include "llvm/Analysis/InlineCost.h"
2021
#include "llvm/Analysis/LoopInfo.h"
2122
#include "llvm/Analysis/ValueTracking.h"
23+
#include "llvm/CodeGen/Analysis.h"
2224
#include "llvm/IR/IRBuilder.h"
2325
#include "llvm/IR/IntrinsicsAMDGPU.h"
2426
#include "llvm/IR/PatternMatch.h"
@@ -1167,10 +1169,57 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
11671169
return true;
11681170
}
11691171

1172+
static unsigned adjustInliningThresholdUsingCallee(const Function *Callee,
1173+
const SITargetLowering *TLI,
1174+
const GCNTTIImpl *TTIImpl) {
1175+
const int NrOfSGPRUntilSpill = 26;
1176+
const int NrOfVGPRUntilSpill = 32;
1177+
1178+
const DataLayout &DL = TTIImpl->getDataLayout();
1179+
1180+
unsigned adjustThreshold = 0;
1181+
int SGPRsInUse = 0;
1182+
int VGPRsInUse = 0;
1183+
for (const Argument &A : Callee->args()) {
1184+
SmallVector<EVT, 4> ValueVTs;
1185+
ComputeValueVTs(*TLI, DL, A.getType(), ValueVTs);
1186+
for (auto ArgVT : ValueVTs) {
1187+
unsigned CCRegNum = TLI->getNumRegistersForCallingConv(
1188+
A.getContext(), Callee->getCallingConv(), ArgVT);
1189+
if (AMDGPU::isArgPassedInSGPR(&A))
1190+
SGPRsInUse += CCRegNum;
1191+
else
1192+
VGPRsInUse += CCRegNum;
1193+
}
1194+
}
1195+
1196+
// The cost of passing function arguments through the stack:
1197+
// 1 instruction to put a function argument on the stack in the caller.
1198+
// 1 instruction to take a function argument from the stack in callee.
1199+
// 1 instruction is explicitly take care of data dependencies in callee
1200+
// function.
1201+
InstructionCost ArgStackCost(1);
1202+
ArgStackCost += const_cast<GCNTTIImpl *>(TTIImpl)->getMemoryOpCost(
1203+
Instruction::Store, Type::getInt32Ty(Callee->getContext()), Align(4),
1204+
AMDGPUAS::PRIVATE_ADDRESS, TTI::TCK_SizeAndLatency);
1205+
ArgStackCost += const_cast<GCNTTIImpl *>(TTIImpl)->getMemoryOpCost(
1206+
Instruction::Load, Type::getInt32Ty(Callee->getContext()), Align(4),
1207+
AMDGPUAS::PRIVATE_ADDRESS, TTI::TCK_SizeAndLatency);
1208+
1209+
// The penalty cost is computed relative to the cost of instructions and does
1210+
// not model any storage costs.
1211+
adjustThreshold += std::max(0, SGPRsInUse - NrOfSGPRUntilSpill) *
1212+
*ArgStackCost.getValue() * InlineConstants::getInstrCost();
1213+
adjustThreshold += std::max(0, VGPRsInUse - NrOfVGPRUntilSpill) *
1214+
*ArgStackCost.getValue() * InlineConstants::getInstrCost();
1215+
return adjustThreshold;
1216+
}
1217+
11701218
unsigned GCNTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
11711219
// If we have a pointer to private array passed into a function
11721220
// it will not be optimized out, leaving scratch usage.
11731221
// Increase the inline threshold to allow inlining in this case.
1222+
unsigned adjustThreshold = 0;
11741223
uint64_t AllocaSize = 0;
11751224
SmallPtrSet<const AllocaInst *, 8> AIVisited;
11761225
for (Value *PtrArg : CB->args()) {
@@ -1192,9 +1241,10 @@ unsigned GCNTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
11921241
}
11931242
}
11941243
}
1195-
if (AllocaSize)
1196-
return ArgAllocaCost;
1197-
return 0;
1244+
adjustThreshold +=
1245+
adjustInliningThresholdUsingCallee(CB->getCalledFunction(), TLI, this);
1246+
adjustThreshold += AllocaSize ? ArgAllocaCost : AllocaSize;
1247+
return adjustThreshold;
11981248
}
11991249

12001250
void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
; REQUIRES: asserts
2+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=inline -inline-cost-full=true -inline-threshold=0 -inline-instr-cost=5 -inline-call-penalty=0 -debug-only=inline < %s 2>&1 | FileCheck %s
3+
4+
; CHECK: NOT Inlining (cost={{[0-9]+}}, threshold={{[0-9]+}}), Call: %noinlinecall1 = call noundef i64 @non_inlining_call
5+
; CHECK: NOT Inlining (cost={{[0-9]+}}, threshold={{[0-9]+}}), Call: %noinlinecall2 = call noundef i64 @non_inlining_call
6+
; CHECK-NOT: NOT Inlining (cost={{[0-9]+}}, threshold={{[0-9]+}}), Call: %inlinecall1 = call noundef i64 @inlining_call
7+
; CHECK-NOT: NOT Inlining (cost={{[0-9]+}}, threshold={{[0-9]+}}), Call: %inlinecall2 = call noundef i64 @inlining_call
8+
9+
define noundef i64 @non_inlining_call(i64 noundef %a0, i64 noundef %b0, i64 noundef %c0, i64 noundef %d0, i64 noundef %e0, i64 noundef %f0, i64 noundef %g0, i64 noundef %h0, i64 noundef %i0, i64 noundef %j0, i64 noundef %k0, i64 noundef %l0, i64 noundef %m0, i64 noundef %n0, i64 noundef %o0, i64 noundef %p0) {
10+
entry:
11+
%xor = xor i64 %a0, %b0
12+
%xor1 = xor i64 %xor, %c0
13+
%xor2 = xor i64 %xor1, %d0
14+
%xor3 = xor i64 %xor2, %e0
15+
%xor4 = xor i64 %xor3, %f0
16+
%xor5 = xor i64 %xor4, %g0
17+
%xor6 = xor i64 %xor5, %h0
18+
%xor7 = xor i64 %xor6, %i0
19+
%xor8 = xor i64 %xor7, %j0
20+
%xor9 = xor i64 %xor8, %k0
21+
%xor10 = xor i64 %xor9, %l0
22+
%xor11 = xor i64 %xor10, %m0
23+
%xor12 = xor i64 %xor11, %n0
24+
%xor13 = xor i64 %xor12, %o0
25+
%xor14 = xor i64 %xor13, %p0
26+
%xor15 = xor i64 %xor14, 1
27+
%xor16 = xor i64 %xor15, 2
28+
ret i64 %xor16
29+
}
30+
31+
define noundef i64 @inlining_call(i64 noundef %a0, i64 noundef %b0, i64 noundef %c0, i64 noundef %d0, i64 noundef %e0, i64 noundef %f0, i64 noundef %g0, i64 noundef %h0, i64 noundef %i0, i64 noundef %j0, i64 noundef %k0, i64 noundef %l0, i64 noundef %m0, i64 noundef %n0, i64 noundef %o0, i64 noundef %p0, i64 noundef %q0) {
32+
entry:
33+
%xor = xor i64 %a0, %b0
34+
%xor1 = xor i64 %xor, %c0
35+
%xor2 = xor i64 %xor1, %d0
36+
%xor3 = xor i64 %xor2, %e0
37+
%xor4 = xor i64 %xor3, %f0
38+
%xor5 = xor i64 %xor4, %g0
39+
%xor6 = xor i64 %xor5, %h0
40+
%xor7 = xor i64 %xor6, %i0
41+
%xor8 = xor i64 %xor7, %j0
42+
%xor9 = xor i64 %xor8, %k0
43+
%xor10 = xor i64 %xor9, %l0
44+
%xor11 = xor i64 %xor10, %m0
45+
%xor12 = xor i64 %xor11, %n0
46+
%xor13 = xor i64 %xor12, %o0
47+
%xor14 = xor i64 %xor13, %p0
48+
%xor15 = xor i64 %xor14, %q0
49+
%xor16 = xor i64 %xor15, 1
50+
%xor17 = xor i64 %xor16, 1
51+
ret i64 %xor17
52+
}
53+
54+
; Calling each (non-)inlining function twice to make sure they won't get the sole call inlining cost bonus.
55+
define i64 @Caller(ptr noundef %in) {
56+
entry:
57+
%arrayidx = getelementptr inbounds i64, ptr %in, i64 0
58+
%a0 = load i64, ptr %arrayidx, align 4
59+
%arrayidx1 = getelementptr inbounds i64, ptr %in, i64 1
60+
%b0 = load i64, ptr %arrayidx1, align 4
61+
%arrayidx2 = getelementptr inbounds i64, ptr %in, i64 2
62+
%c0 = load i64, ptr %arrayidx2, align 4
63+
%arrayidx3 = getelementptr inbounds i64, ptr %in, i64 3
64+
%d0 = load i64, ptr %arrayidx3, align 4
65+
%arrayidx4 = getelementptr inbounds i64, ptr %in, i64 4
66+
%e0 = load i64, ptr %arrayidx4, align 4
67+
%arrayidx5 = getelementptr inbounds i64, ptr %in, i64 5
68+
%f0 = load i64, ptr %arrayidx5, align 4
69+
%arrayidx6 = getelementptr inbounds i64, ptr %in, i64 6
70+
%g0 = load i64, ptr %arrayidx6, align 4
71+
%arrayidx7 = getelementptr inbounds i64, ptr %in, i64 7
72+
%h0 = load i64, ptr %arrayidx7, align 4
73+
%arrayidx8 = getelementptr inbounds i64, ptr %in, i64 8
74+
%i0 = load i64, ptr %arrayidx8, align 4
75+
%arrayidx9 = getelementptr inbounds i64, ptr %in, i64 9
76+
%j0 = load i64, ptr %arrayidx9, align 4
77+
%arrayidx10 = getelementptr inbounds i64, ptr %in, i64 10
78+
%k0 = load i64, ptr %arrayidx10, align 4
79+
%arrayidx11 = getelementptr inbounds i64, ptr %in, i64 11
80+
%l0 = load i64, ptr %arrayidx11, align 4
81+
%arrayidx12 = getelementptr inbounds i64, ptr %in, i64 12
82+
%m0 = load i64, ptr %arrayidx12, align 4
83+
%arrayidx13 = getelementptr inbounds i64, ptr %in, i64 13
84+
%n0 = load i64, ptr %arrayidx13, align 4
85+
%arrayidx14 = getelementptr inbounds i64, ptr %in, i64 14
86+
%o0 = load i64, ptr %arrayidx14, align 4
87+
%arrayidx15 = getelementptr inbounds i64, ptr %in, i64 15
88+
%p0 = load i64, ptr %arrayidx15, align 4
89+
%arrayidx16 = getelementptr inbounds i64, ptr %in, i64 16
90+
%q0 = load i64, ptr %arrayidx16, align 4
91+
%noinlinecall1 = call noundef i64 @non_inlining_call(i64 noundef %a0, i64 noundef %b0, i64 noundef %c0, i64 noundef %d0, i64 noundef %e0, i64 noundef %f0, i64 noundef %g0, i64 noundef %h0, i64 noundef %i0, i64 noundef %j0, i64 noundef %k0, i64 noundef %l0, i64 noundef %m0, i64 noundef %n0, i64 noundef %o0, i64 noundef %p0)
92+
%add = add i64 0, %noinlinecall1
93+
%noinlinecall2 = call noundef i64 @non_inlining_call(i64 noundef %a0, i64 noundef %b0, i64 noundef %c0, i64 noundef %d0, i64 noundef %e0, i64 noundef %f0, i64 noundef %g0, i64 noundef %h0, i64 noundef %i0, i64 noundef %j0, i64 noundef %k0, i64 noundef %l0, i64 noundef %m0, i64 noundef %n0, i64 noundef %o0, i64 noundef %p0)
94+
%add2 = add i64 %add, %noinlinecall2
95+
%inlinecall1 = call noundef i64 @inlining_call(i64 noundef %a0, i64 noundef %b0, i64 noundef %c0, i64 noundef %d0, i64 noundef %e0, i64 noundef %f0, i64 noundef %g0, i64 noundef %h0, i64 noundef %i0, i64 noundef %j0, i64 noundef %k0, i64 noundef %l0, i64 noundef %m0, i64 noundef %n0, i64 noundef %o0, i64 noundef %p0, i64 noundef %q0)
96+
%add3 = add i64 %add2, %inlinecall1
97+
%inlinecall2 = call noundef i64 @inlining_call(i64 noundef %a0, i64 noundef %b0, i64 noundef %c0, i64 noundef %d0, i64 noundef %e0, i64 noundef %f0, i64 noundef %g0, i64 noundef %h0, i64 noundef %i0, i64 noundef %j0, i64 noundef %k0, i64 noundef %l0, i64 noundef %m0, i64 noundef %n0, i64 noundef %o0, i64 noundef %p0, i64 noundef %q0)
98+
%add4 = add i64 %add3, %inlinecall2
99+
ret i64 %add4
100+
}
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
; REQUIRES: asserts
2+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=inline -inline-cost-full=true -inline-threshold=0 -inline-instr-cost=5 -inline-call-penalty=0 -debug-only=inline < %s 2>&1 | FileCheck %s
3+
4+
; CHECK: NOT Inlining (cost={{[0-9]+}}, threshold={{[0-9]+}}), Call: %noinlinecall1 = call noundef i32 @non_inlining_call
5+
; CHECK: NOT Inlining (cost={{[0-9]+}}, threshold={{[0-9]+}}), Call: %noinlinecall2 = call noundef i32 @non_inlining_call
6+
; CHECK-NOT: NOT Inlining (cost={{[0-9]+}}, threshold={{[0-9]+}}), Call: %inlinecall1 = call noundef i32 @inlining_call
7+
; CHECK-NOT: NOT Inlining (cost={{[0-9]+}}, threshold={{[0-9]+}}), Call: %inlinecall2 = call noundef i32 @inlining_call
8+
9+
define noundef i32 @non_inlining_call(i32 noundef %a0, i32 noundef %b0, i32 noundef %c0, i32 noundef %d0, i32 noundef %e0, i32 noundef %f0, i32 noundef %g0, i32 noundef %h0, i32 noundef %i0, i32 noundef %j0, i32 noundef %k0, i32 noundef %l0, i32 noundef %m0, i32 noundef %n0, i32 noundef %o0, i32 noundef %p0, i32 noundef %q0, i32 noundef %r0, i32 noundef %s0, i32 noundef %t0, i32 noundef %u0, i32 noundef %v0, i32 noundef %w0, i32 noundef %x0, i32 noundef %y0, i32 noundef %z0, i32 noundef %a1, i32 noundef %b1, i32 noundef %c1, i32 noundef %d1, i32 noundef %e1, i32 noundef %f1) {
10+
entry:
11+
%xor = xor i32 %a0, %b0
12+
%xor1 = xor i32 %xor, %c0
13+
%xor2 = xor i32 %xor1, %d0
14+
%xor3 = xor i32 %xor2, %e0
15+
%xor4 = xor i32 %xor3, %f0
16+
%xor5 = xor i32 %xor4, %g0
17+
%xor6 = xor i32 %xor5, %h0
18+
%xor7 = xor i32 %xor6, %i0
19+
%xor8 = xor i32 %xor7, %j0
20+
%xor9 = xor i32 %xor8, %k0
21+
%xor10 = xor i32 %xor9, %l0
22+
%xor11 = xor i32 %xor10, %m0
23+
%xor12 = xor i32 %xor11, %n0
24+
%xor13 = xor i32 %xor12, %o0
25+
%xor14 = xor i32 %xor13, %p0
26+
%xor15 = xor i32 %xor14, %q0
27+
%xor16 = xor i32 %xor15, %r0
28+
%xor17 = xor i32 %xor16, %s0
29+
%xor18 = xor i32 %xor17, %t0
30+
%xor19 = xor i32 %xor18, %u0
31+
%xor20 = xor i32 %xor19, %v0
32+
%xor21 = xor i32 %xor20, %w0
33+
%xor22 = xor i32 %xor21, %x0
34+
%xor23 = xor i32 %xor22, %y0
35+
%xor24 = xor i32 %xor23, %z0
36+
%xor25 = xor i32 %xor24, %a1
37+
%xor26 = xor i32 %xor25, %b1
38+
%xor27 = xor i32 %xor26, %c1
39+
%xor28 = xor i32 %xor27, %d1
40+
%xor29 = xor i32 %xor28, %e1
41+
%xor30 = xor i32 %xor29, %f1
42+
%xor31 = xor i32 %xor30, 1
43+
%xor32 = xor i32 %xor31, 2
44+
ret i32 %xor32
45+
}
46+
47+
define noundef i32 @inlining_call(i32 noundef %a0, i32 noundef %b0, i32 noundef %c0, i32 noundef %d0, i32 noundef %e0, i32 noundef %f0, i32 noundef %g0, i32 noundef %h0, i32 noundef %i0, i32 noundef %j0, i32 noundef %k0, i32 noundef %l0, i32 noundef %m0, i32 noundef %n0, i32 noundef %o0, i32 noundef %p0, i32 noundef %q0, i32 noundef %r0, i32 noundef %s0, i32 noundef %t0, i32 noundef %u0, i32 noundef %v0, i32 noundef %w0, i32 noundef %x0, i32 noundef %y0, i32 noundef %z0, i32 noundef %a1, i32 noundef %b1, i32 noundef %c1, i32 noundef %d1, i32 noundef %e1, i32 noundef %f1, i32 noundef %g1) {
48+
entry:
49+
%xor = xor i32 %a0, %b0
50+
%xor1 = xor i32 %xor, %c0
51+
%xor2 = xor i32 %xor1, %d0
52+
%xor3 = xor i32 %xor2, %e0
53+
%xor4 = xor i32 %xor3, %f0
54+
%xor5 = xor i32 %xor4, %g0
55+
%xor6 = xor i32 %xor5, %h0
56+
%xor7 = xor i32 %xor6, %i0
57+
%xor8 = xor i32 %xor7, %j0
58+
%xor9 = xor i32 %xor8, %k0
59+
%xor10 = xor i32 %xor9, %l0
60+
%xor11 = xor i32 %xor10, %m0
61+
%xor12 = xor i32 %xor11, %n0
62+
%xor13 = xor i32 %xor12, %o0
63+
%xor14 = xor i32 %xor13, %p0
64+
%xor15 = xor i32 %xor14, %q0
65+
%xor16 = xor i32 %xor15, %r0
66+
%xor17 = xor i32 %xor16, %s0
67+
%xor18 = xor i32 %xor17, %t0
68+
%xor19 = xor i32 %xor18, %u0
69+
%xor20 = xor i32 %xor19, %v0
70+
%xor21 = xor i32 %xor20, %w0
71+
%xor22 = xor i32 %xor21, %x0
72+
%xor23 = xor i32 %xor22, %y0
73+
%xor24 = xor i32 %xor23, %z0
74+
%xor25 = xor i32 %xor24, %a1
75+
%xor26 = xor i32 %xor25, %b1
76+
%xor27 = xor i32 %xor26, %c1
77+
%xor28 = xor i32 %xor27, %d1
78+
%xor29 = xor i32 %xor28, %e1
79+
%xor30 = xor i32 %xor29, %f1
80+
%xor31 = xor i32 %xor30, %g1
81+
%xor32 = xor i32 %xor30, 1
82+
%xor33 = xor i32 %xor31, 2
83+
ret i32 %xor33
84+
}
85+
86+
; Calling each (non-)inlining function twice to make sure they won't get the sole call inlining cost bonus.
87+
define i32 @Caller(ptr noundef %in) {
88+
entry:
89+
%arrayidx = getelementptr inbounds i32, ptr %in, i64 0
90+
%a0 = load i32, ptr %arrayidx, align 4
91+
%arrayidx1 = getelementptr inbounds i32, ptr %in, i64 1
92+
%b0 = load i32, ptr %arrayidx1, align 4
93+
%arrayidx2 = getelementptr inbounds i32, ptr %in, i64 2
94+
%c0 = load i32, ptr %arrayidx2, align 4
95+
%arrayidx3 = getelementptr inbounds i32, ptr %in, i64 3
96+
%d0 = load i32, ptr %arrayidx3, align 4
97+
%arrayidx4 = getelementptr inbounds i32, ptr %in, i64 4
98+
%e0 = load i32, ptr %arrayidx4, align 4
99+
%arrayidx5 = getelementptr inbounds i32, ptr %in, i64 5
100+
%f0 = load i32, ptr %arrayidx5, align 4
101+
%arrayidx6 = getelementptr inbounds i32, ptr %in, i64 6
102+
%g0 = load i32, ptr %arrayidx6, align 4
103+
%arrayidx7 = getelementptr inbounds i32, ptr %in, i64 7
104+
%h0 = load i32, ptr %arrayidx7, align 4
105+
%arrayidx8 = getelementptr inbounds i32, ptr %in, i64 8
106+
%i0 = load i32, ptr %arrayidx8, align 4
107+
%arrayidx9 = getelementptr inbounds i32, ptr %in, i64 9
108+
%j0 = load i32, ptr %arrayidx9, align 4
109+
%arrayidx10 = getelementptr inbounds i32, ptr %in, i64 10
110+
%k0 = load i32, ptr %arrayidx10, align 4
111+
%arrayidx11 = getelementptr inbounds i32, ptr %in, i64 11
112+
%l0 = load i32, ptr %arrayidx11, align 4
113+
%arrayidx12 = getelementptr inbounds i32, ptr %in, i64 12
114+
%m0 = load i32, ptr %arrayidx12, align 4
115+
%arrayidx13 = getelementptr inbounds i32, ptr %in, i64 13
116+
%n0 = load i32, ptr %arrayidx13, align 4
117+
%arrayidx14 = getelementptr inbounds i32, ptr %in, i64 14
118+
%o0 = load i32, ptr %arrayidx14, align 4
119+
%arrayidx15 = getelementptr inbounds i32, ptr %in, i64 15
120+
%p0 = load i32, ptr %arrayidx15, align 4
121+
%arrayidx16 = getelementptr inbounds i32, ptr %in, i64 16
122+
%q0 = load i32, ptr %arrayidx16, align 4
123+
%arrayidx17 = getelementptr inbounds i32, ptr %in, i64 17
124+
%r0 = load i32, ptr %arrayidx17, align 4
125+
%arrayidx18 = getelementptr inbounds i32, ptr %in, i64 18
126+
%s0 = load i32, ptr %arrayidx18, align 4
127+
%arrayidx19 = getelementptr inbounds i32, ptr %in, i64 19
128+
%t0 = load i32, ptr %arrayidx19, align 4
129+
%arrayidx20 = getelementptr inbounds i32, ptr %in, i64 20
130+
%u0 = load i32, ptr %arrayidx20, align 4
131+
%arrayidx21 = getelementptr inbounds i32, ptr %in, i64 21
132+
%v0 = load i32, ptr %arrayidx21, align 4
133+
%arrayidx22 = getelementptr inbounds i32, ptr %in, i64 22
134+
%w0 = load i32, ptr %arrayidx22, align 4
135+
%arrayidx23 = getelementptr inbounds i32, ptr %in, i64 23
136+
%x0 = load i32, ptr %arrayidx23, align 4
137+
%arrayidx24 = getelementptr inbounds i32, ptr %in, i64 24
138+
%y0 = load i32, ptr %arrayidx24, align 4
139+
%arrayidx25 = getelementptr inbounds i32, ptr %in, i64 25
140+
%z0 = load i32, ptr %arrayidx25, align 4
141+
%arrayidx26 = getelementptr inbounds i32, ptr %in, i64 26
142+
%a1 = load i32, ptr %arrayidx26, align 4
143+
%arrayidx27 = getelementptr inbounds i32, ptr %in, i64 27
144+
%b1 = load i32, ptr %arrayidx27, align 4
145+
%arrayidx28 = getelementptr inbounds i32, ptr %in, i64 28
146+
%c1 = load i32, ptr %arrayidx28, align 4
147+
%arrayidx29 = getelementptr inbounds i32, ptr %in, i64 29
148+
%d1 = load i32, ptr %arrayidx29, align 4
149+
%arrayidx30 = getelementptr inbounds i32, ptr %in, i64 30
150+
%e1 = load i32, ptr %arrayidx30, align 4
151+
%arrayidx31 = getelementptr inbounds i32, ptr %in, i64 31
152+
%f1 = load i32, ptr %arrayidx31, align 4
153+
%arrayidx32 = getelementptr inbounds i32, ptr %in, i64 32
154+
%g1 = load i32, ptr %arrayidx32, align 4
155+
%noinlinecall1 = call noundef i32 @non_inlining_call(i32 noundef %a0, i32 noundef %b0, i32 noundef %c0, i32 noundef %d0, i32 noundef %e0, i32 noundef %f0, i32 noundef %g0, i32 noundef %h0, i32 noundef %i0, i32 noundef %j0, i32 noundef %k0, i32 noundef %l0, i32 noundef %m0, i32 noundef %n0, i32 noundef %o0, i32 noundef %p0, i32 noundef %q0, i32 noundef %r0, i32 noundef %s0, i32 noundef %t0, i32 noundef %u0, i32 noundef %v0, i32 noundef %w0, i32 noundef %x0, i32 noundef %y0, i32 noundef %z0, i32 noundef %a1, i32 noundef %b1, i32 noundef %c1, i32 noundef %d1, i32 noundef %e1, i32 noundef %f1)
156+
%add = add i32 0, %noinlinecall1
157+
%noinlinecall2 = call noundef i32 @non_inlining_call(i32 noundef %a0, i32 noundef %b0, i32 noundef %c0, i32 noundef %d0, i32 noundef %e0, i32 noundef %f0, i32 noundef %g0, i32 noundef %h0, i32 noundef %i0, i32 noundef %j0, i32 noundef %k0, i32 noundef %l0, i32 noundef %m0, i32 noundef %n0, i32 noundef %o0, i32 noundef %p0, i32 noundef %q0, i32 noundef %r0, i32 noundef %s0, i32 noundef %t0, i32 noundef %u0, i32 noundef %v0, i32 noundef %w0, i32 noundef %x0, i32 noundef %y0, i32 noundef %z0, i32 noundef %a1, i32 noundef %b1, i32 noundef %c1, i32 noundef %d1, i32 noundef %e1, i32 noundef %f1)
158+
%add2 = add i32 %add, %noinlinecall2
159+
%inlinecall1 = call noundef i32 @inlining_call(i32 noundef %a0, i32 noundef %b0, i32 noundef %c0, i32 noundef %d0, i32 noundef %e0, i32 noundef %f0, i32 noundef %g0, i32 noundef %h0, i32 noundef %i0, i32 noundef %j0, i32 noundef %k0, i32 noundef %l0, i32 noundef %m0, i32 noundef %n0, i32 noundef %o0, i32 noundef %p0, i32 noundef %q0, i32 noundef %r0, i32 noundef %s0, i32 noundef %t0, i32 noundef %u0, i32 noundef %v0, i32 noundef %w0, i32 noundef %x0, i32 noundef %y0, i32 noundef %z0, i32 noundef %a1, i32 noundef %b1, i32 noundef %c1, i32 noundef %d1, i32 noundef %e1, i32 noundef %f1, i32 noundef %g1)
160+
%add3 = add i32 %add2, %inlinecall1
161+
%inlinecall2 = call noundef i32 @inlining_call(i32 noundef %a0, i32 noundef %b0, i32 noundef %c0, i32 noundef %d0, i32 noundef %e0, i32 noundef %f0, i32 noundef %g0, i32 noundef %h0, i32 noundef %i0, i32 noundef %j0, i32 noundef %k0, i32 noundef %l0, i32 noundef %m0, i32 noundef %n0, i32 noundef %o0, i32 noundef %p0, i32 noundef %q0, i32 noundef %r0, i32 noundef %s0, i32 noundef %t0, i32 noundef %u0, i32 noundef %v0, i32 noundef %w0, i32 noundef %x0, i32 noundef %y0, i32 noundef %z0, i32 noundef %a1, i32 noundef %b1, i32 noundef %c1, i32 noundef %d1, i32 noundef %e1, i32 noundef %f1, i32 noundef %g1)
162+
%add4 = add i32 %add3, %inlinecall2
163+
ret i32 %add4
164+
}

0 commit comments

Comments
 (0)