Skip to content

Commit 59fb9cd

Browse files
committed
Devirtualize a call on alloca without waiting for post inline cleanup and next
DevirtSCCRepeatedPass iteration. Needs ReviewPublic This aims to fix a missed inlining case. If there's a virtual call in the callee on an alloca (stack allocated object) in the caller, and the callee is inlined into the caller, the post-inline cleanup would devirtualize the virtual call, but if the next iteration of DevirtSCCRepeatedPass doesn't happen (under the new pass manager), which is based on a heuristic to determine whether to reiterate, we may miss inlining the devirtualized call. This enables inlining in clang/test/CodeGenCXX/member-function-pointer-calls.cpp.
1 parent 1e9321e commit 59fb9cd

File tree

3 files changed

+230
-8
lines changed

3 files changed

+230
-8
lines changed

clang/test/CodeGenCXX/member-function-pointer-calls.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,8 @@ int f(A* a, int (A::*fp)()) {
1111
}
1212

1313
// CHECK-LABEL: define i32 @_Z2g1v()
14-
// CHECK-LEGACY: ret i32 1
15-
// CHECK-NEWPM: [[A:%.*]] = alloca %struct.A, align 8
16-
// CHECK-NEWPM: [[TMP:%.*]] = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0
17-
// CHECK-NEWPM: store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** [[TMP]], align 8
18-
// CHECK-NEWPM: [[RET:%.*]] = call i32 @_ZN1A3vf1Ev(%struct.A* nonnull %a) #2
19-
// CHECK-NEWPM: ret i32 [[RET]]
14+
// CHECK-NOT: }
15+
// CHECK: ret i32 1
2016
// MINGW64-LABEL: define dso_local i32 @_Z2g1v()
2117
// MINGW64: call i32 @_Z1fP1AMS_FivE(%struct.A* %{{.*}}, { i64, i64 }* %{{.*}})
2218
int g1() {
@@ -25,6 +21,7 @@ int g1() {
2521
}
2622

2723
// CHECK-LABEL: define i32 @_Z2g2v()
24+
// CHECK-NOT: }
2825
// CHECK: ret i32 2
2926
// MINGW64-LABEL: define dso_local i32 @_Z2g2v()
3027
// MINGW64: call i32 @_Z1fP1AMS_FivE(%struct.A* %{{.*}}, { i64, i64 }* %{{.*}})

llvm/lib/Transforms/IPO/Inliner.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "llvm/Analysis/TargetLibraryInfo.h"
3636
#include "llvm/Analysis/TargetTransformInfo.h"
3737
#include "llvm/Transforms/Utils/Local.h"
38+
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
3839
#include "llvm/IR/Attributes.h"
3940
#include "llvm/IR/BasicBlock.h"
4041
#include "llvm/IR/CallSite.h"
@@ -1100,10 +1101,20 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
11001101
if (!IFI.InlinedCallSites.empty()) {
11011102
int NewHistoryID = InlineHistory.size();
11021103
InlineHistory.push_back({&Callee, InlineHistoryID});
1103-
for (CallSite &CS : reverse(IFI.InlinedCallSites))
1104-
if (Function *NewCallee = CS.getCalledFunction())
1104+
for (CallSite &CS : reverse(IFI.InlinedCallSites)) {
1105+
Function *NewCallee = CS.getCalledFunction();
1106+
if (!NewCallee) {
1107+
// Try to promote an indirect (virtual) call without waiting for the
1108+
// post-inline cleanup and the next DevirtSCCRepeatedPass iteration
1109+
// because the next iteration may not happen and we may miss
1110+
// inlining it.
1111+
if (tryPromoteCall(CS))
1112+
NewCallee = CS.getCalledFunction();
1113+
}
1114+
if (NewCallee)
11051115
if (!NewCallee->isDeclaration())
11061116
Calls.push_back({CS, NewHistoryID});
1117+
}
11071118
}
11081119

11091120
if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
; RUN: opt < %s -passes='cgscc(devirt<4>(inline)),function(sroa,early-cse)' -S | FileCheck %s
2+
; RUN: opt < %s -passes='default<O3>' -S | FileCheck %s
3+
4+
; Check that DoNotOptimize is inlined into Test.
5+
; CHECK: @_Z4Testv()
6+
; CHECK-NOT: ret void
7+
; CHECK: call void asm
8+
; CHECK: ret void
9+
10+
;template <class T>
11+
;void DoNotOptimize(const T& var) {
12+
; asm volatile("" : "+m"(const_cast<T&>(var)));
13+
;}
14+
;
15+
;class Interface {
16+
; public:
17+
; virtual void Run() = 0;
18+
;};
19+
;
20+
;class Impl : public Interface {
21+
; public:
22+
; Impl() : f(3) {}
23+
; void Run() { DoNotOptimize(this); }
24+
;
25+
; private:
26+
; int f;
27+
;};
28+
;
29+
;static void IndirectRun(Interface& o) { o.Run(); }
30+
;
31+
;void Test() {
32+
; Impl o;
33+
; IndirectRun(o);
34+
;}
35+
36+
%class.Impl = type <{ %class.Interface, i32, [4 x i8] }>
37+
%class.Interface = type { i32 (...)** }
38+
39+
@_ZTV4Impl = linkonce_odr dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI4Impl to i8*), i8* bitcast (void (%class.Impl*)* @_ZN4Impl3RunEv to i8*)] }, align 8
40+
@_ZTVN10__cxxabiv120__si_class_type_infoE = external dso_local global i8*
41+
@_ZTS4Impl = linkonce_odr dso_local constant [6 x i8] c"4Impl\00", align 1
42+
@_ZTVN10__cxxabiv117__class_type_infoE = external dso_local global i8*
43+
@_ZTS9Interface = linkonce_odr dso_local constant [11 x i8] c"9Interface\00", align 1
44+
@_ZTI9Interface = linkonce_odr dso_local constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @_ZTS9Interface, i32 0, i32 0) }, align 8
45+
@_ZTI4Impl = linkonce_odr dso_local constant { i8*, i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @_ZTS4Impl, i32 0, i32 0), i8* bitcast ({ i8*, i8* }* @_ZTI9Interface to i8*) }, align 8
46+
@_ZTV9Interface = linkonce_odr dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI9Interface to i8*), i8* bitcast (void ()* @__cxa_pure_virtual to i8*)] }, align 8
47+
48+
define dso_local void @_Z4Testv() local_unnamed_addr {
49+
entry:
50+
%o = alloca %class.Impl, align 8
51+
%0 = bitcast %class.Impl* %o to i8*
52+
call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0)
53+
call void @_ZN4ImplC2Ev(%class.Impl* nonnull %o)
54+
%1 = getelementptr inbounds %class.Impl, %class.Impl* %o, i64 0, i32 0
55+
call fastcc void @_ZL11IndirectRunR9Interface(%class.Interface* nonnull dereferenceable(8) %1)
56+
call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0)
57+
ret void
58+
}
59+
60+
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
61+
62+
define linkonce_odr dso_local void @_ZN4ImplC2Ev(%class.Impl* %this) unnamed_addr align 2 {
63+
entry:
64+
%0 = getelementptr %class.Impl, %class.Impl* %this, i64 0, i32 0
65+
call void @_ZN9InterfaceC2Ev(%class.Interface* %0)
66+
%1 = getelementptr %class.Impl, %class.Impl* %this, i64 0, i32 0, i32 0
67+
store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV4Impl, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %1, align 8
68+
%f = getelementptr inbounds %class.Impl, %class.Impl* %this, i64 0, i32 1
69+
store i32 3, i32* %f, align 8
70+
ret void
71+
}
72+
73+
define internal fastcc void @_ZL11IndirectRunR9Interface(%class.Interface* dereferenceable(8) %o) unnamed_addr {
74+
entry:
75+
%0 = bitcast %class.Interface* %o to void (%class.Interface*)***
76+
%vtable = load void (%class.Interface*)**, void (%class.Interface*)*** %0, align 8
77+
%1 = load void (%class.Interface*)*, void (%class.Interface*)** %vtable, align 8
78+
call void %1(%class.Interface* nonnull %o)
79+
ret void
80+
}
81+
82+
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
83+
84+
define linkonce_odr dso_local void @_ZN9InterfaceC2Ev(%class.Interface* %this) unnamed_addr align 2 {
85+
entry:
86+
%0 = getelementptr %class.Interface, %class.Interface* %this, i64 0, i32 0
87+
store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV9Interface, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
88+
ret void
89+
}
90+
91+
define linkonce_odr dso_local void @_ZN4Impl3RunEv(%class.Impl* %this) unnamed_addr align 2 {
92+
entry:
93+
%ref.tmp = alloca %class.Impl*, align 8
94+
%0 = bitcast %class.Impl** %ref.tmp to i8*
95+
call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0)
96+
store %class.Impl* %this, %class.Impl** %ref.tmp, align 8
97+
call void @_Z13DoNotOptimizeIP4ImplEvRKT_(%class.Impl** nonnull dereferenceable(8) %ref.tmp)
98+
call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0)
99+
ret void
100+
}
101+
102+
declare dso_local void @__cxa_pure_virtual() unnamed_addr
103+
104+
define linkonce_odr dso_local void @_Z13DoNotOptimizeIP4ImplEvRKT_(%class.Impl** dereferenceable(8) %var) local_unnamed_addr {
105+
entry:
106+
call void asm sideeffect "", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(%class.Impl** nonnull %var, %class.Impl** nonnull %var)
107+
ret void
108+
}
109+
110+
111+
; Based on clang/test/CodeGenCXX/member-function-pointer-calls.cpp.
112+
; Check that vf1 and vf2 are inlined into g1 and g2.
113+
; CHECK: @_Z2g1v()
114+
; CHECK-NOT: }
115+
; CHECK: ret i32 1
116+
; CHECK: @_Z2g2v()
117+
; CHECK-NOT: }
118+
; CHECK: ret i32 2
119+
;
120+
;struct A {
121+
; virtual int vf1() { return 1; }
122+
; virtual int vf2() { return 2; }
123+
;};
124+
;
125+
;int f(A* a, int (A::*fp)()) {
126+
; return (a->*fp)();
127+
;}
128+
;int g1() {
129+
; A a;
130+
; return f(&a, &A::vf1);
131+
;}
132+
;int g2() {
133+
; A a;
134+
; return f(&a, &A::vf2);
135+
;}
136+
137+
%struct.A = type { i32 (...)** }
138+
139+
@_ZTV1A = linkonce_odr unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1A to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3vf1Ev to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3vf2Ev to i8*)] }, align 8
140+
@_ZTS1A = linkonce_odr constant [3 x i8] c"1A\00", align 1
141+
@_ZTI1A = linkonce_odr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1A, i32 0, i32 0) }, align 8
142+
143+
define i32 @_Z1fP1AMS_FivE(%struct.A* %a, i64 %fp.coerce0, i64 %fp.coerce1) {
144+
entry:
145+
%0 = bitcast %struct.A* %a to i8*
146+
%1 = getelementptr inbounds i8, i8* %0, i64 %fp.coerce1
147+
%this.adjusted = bitcast i8* %1 to %struct.A*
148+
%2 = and i64 %fp.coerce0, 1
149+
%memptr.isvirtual = icmp eq i64 %2, 0
150+
br i1 %memptr.isvirtual, label %memptr.nonvirtual, label %memptr.virtual
151+
152+
memptr.virtual: ; preds = %entry
153+
%3 = bitcast i8* %1 to i8**
154+
%vtable = load i8*, i8** %3, align 8
155+
%4 = add i64 %fp.coerce0, -1
156+
%5 = getelementptr i8, i8* %vtable, i64 %4
157+
%6 = bitcast i8* %5 to i32 (%struct.A*)**
158+
%memptr.virtualfn = load i32 (%struct.A*)*, i32 (%struct.A*)** %6, align 8
159+
br label %memptr.end
160+
161+
memptr.nonvirtual: ; preds = %entry
162+
%memptr.nonvirtualfn = inttoptr i64 %fp.coerce0 to i32 (%struct.A*)*
163+
br label %memptr.end
164+
165+
memptr.end: ; preds = %memptr.nonvirtual, %memptr.virtual
166+
%7 = phi i32 (%struct.A*)* [ %memptr.virtualfn, %memptr.virtual ], [ %memptr.nonvirtualfn, %memptr.nonvirtual ]
167+
%call = call i32 %7(%struct.A* %this.adjusted)
168+
ret i32 %call
169+
}
170+
171+
define i32 @_Z2g1v() {
172+
entry:
173+
%a = alloca %struct.A, align 8
174+
%0 = bitcast %struct.A* %a to i8*
175+
call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0)
176+
call void @_ZN1AC1Ev(%struct.A* nonnull %a)
177+
%call = call i32 @_Z1fP1AMS_FivE(%struct.A* nonnull %a, i64 1, i64 0)
178+
call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0)
179+
ret i32 %call
180+
}
181+
182+
define linkonce_odr void @_ZN1AC1Ev(%struct.A* %this) align 2 {
183+
entry:
184+
call void @_ZN1AC2Ev(%struct.A* %this)
185+
ret void
186+
}
187+
188+
define i32 @_Z2g2v() {
189+
entry:
190+
%a = alloca %struct.A, align 8
191+
%0 = bitcast %struct.A* %a to i8*
192+
call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0)
193+
call void @_ZN1AC1Ev(%struct.A* nonnull %a)
194+
%call = call i32 @_Z1fP1AMS_FivE(%struct.A* nonnull %a, i64 9, i64 0)
195+
call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0)
196+
ret i32 %call
197+
}
198+
199+
define linkonce_odr void @_ZN1AC2Ev(%struct.A* %this) align 2 {
200+
entry:
201+
%0 = getelementptr %struct.A, %struct.A* %this, i64 0, i32 0
202+
store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
203+
ret void
204+
}
205+
206+
define linkonce_odr i32 @_ZN1A3vf1Ev(%struct.A* %this) align 2 {
207+
entry:
208+
ret i32 1
209+
}
210+
211+
define linkonce_odr i32 @_ZN1A3vf2Ev(%struct.A* %this) align 2 {
212+
entry:
213+
ret i32 2
214+
}

0 commit comments

Comments
 (0)