Skip to content

Commit 28518d9

Browse files
committed
[InlineFunction] Handle return attributes on call within inlined body
Consider a callee function that has a call (C) within it which feeds into the return. When we inline that callee into a callsite that has return attributes, we can backward propagate those attributes to the call (C) within that inlined callee body. This is safe to do so only if we can guarantee transfer of execution to successor in the window of instructions between return value (i.e. the call C) and the return instruction. See added test cases. Reviewed-By: reames, jdoerfert Differential Revision: https://reviews.llvm.org/D76140
1 parent 128c0d0 commit 28518d9

File tree

6 files changed

+266
-18
lines changed

6 files changed

+266
-18
lines changed

clang/test/CodeGen/builtins-systemz-zvector.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3665,31 +3665,31 @@ void test_integer(void) {
36653665
// CHECK-ASM: vsumqg
36663666

36673667
idx = vec_test_mask(vsc, vuc);
3668-
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
3668+
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
36693669
// CHECK-ASM: vtm
36703670
idx = vec_test_mask(vuc, vuc);
3671-
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
3671+
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
36723672
// CHECK-ASM: vtm
36733673
idx = vec_test_mask(vss, vus);
3674-
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
3674+
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
36753675
// CHECK-ASM: vtm
36763676
idx = vec_test_mask(vus, vus);
3677-
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
3677+
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
36783678
// CHECK-ASM: vtm
36793679
idx = vec_test_mask(vsi, vui);
3680-
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
3680+
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
36813681
// CHECK-ASM: vtm
36823682
idx = vec_test_mask(vui, vui);
3683-
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
3683+
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
36843684
// CHECK-ASM: vtm
36853685
idx = vec_test_mask(vsl, vul);
3686-
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
3686+
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
36873687
// CHECK-ASM: vtm
36883688
idx = vec_test_mask(vul, vul);
3689-
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
3689+
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
36903690
// CHECK-ASM: vtm
36913691
idx = vec_test_mask(vd, vul);
3692-
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
3692+
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
36933693
// CHECK-ASM: vtm
36943694
}
36953695

clang/test/CodeGen/builtins-systemz-zvector2.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -654,10 +654,10 @@ void test_integer(void) {
654654
// CHECK-ASM: vsrlb
655655

656656
idx = vec_test_mask(vf, vui);
657-
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
657+
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
658658
// CHECK-ASM: vtm
659659
idx = vec_test_mask(vd, vul);
660-
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
660+
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
661661
// CHECK-ASM: vtm
662662

663663
vuc = vec_msum_u128(vul, vul, vuc, 0);

clang/test/CodeGen/movbe-builtins.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
short test_loadbe_i16(const short *P) {
88
// CHECK-LABEL: @test_loadbe_i16
99
// CHECK: [[LOAD:%.*]] = load i16, i16* %{{.*}}, align 1
10-
// CHECK: call i16 @llvm.bswap.i16(i16 [[LOAD]])
10+
// CHECK: call signext i16 @llvm.bswap.i16(i16 [[LOAD]])
1111
return _loadbe_i16(P);
1212
}
1313

clang/test/CodeGen/rot-intrinsics.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
// RUN: %clang_cc1 -ffreestanding -triple i686--linux -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
2-
// RUN: %clang_cc1 -ffreestanding -triple x86_64--linux -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG
3-
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
4-
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
5-
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
6-
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
1+
// RUN: %clang_cc1 -ffreestanding -triple i686--linux -emit-llvm -mllvm -update-return-attrs=false %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
2+
// RUN: %clang_cc1 -ffreestanding -triple x86_64--linux -emit-llvm -mllvm -update-return-attrs=false %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG
3+
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
4+
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
5+
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
6+
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
77

88
#include <x86intrin.h>
99

llvm/lib/Transforms/Utils/InlineFunction.cpp

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,21 @@ EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),
8080
cl::Hidden,
8181
cl::desc("Convert noalias attributes to metadata during inlining."));
8282

83+
static cl::opt<bool> UpdateReturnAttributes(
84+
"update-return-attrs", cl::init(true), cl::Hidden,
85+
cl::desc("Update return attributes on calls within inlined body"));
86+
8387
static cl::opt<bool>
8488
PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
8589
cl::init(true), cl::Hidden,
8690
cl::desc("Convert align attributes to assumptions during inlining."));
8791

92+
static cl::opt<unsigned> InlinerAttributeWindow(
93+
"inliner-attribute-window", cl::Hidden,
94+
cl::desc("the maximum number of instructions analyzed for may throw during "
95+
"attribute inference in inlined body"),
96+
cl::init(4));
97+
8898
llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI,
8999
AAResults *CalleeAAR,
90100
bool InsertLifetime) {
@@ -1136,6 +1146,81 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
11361146
}
11371147
}
11381148

1149+
static bool MayContainThrowingOrExitingCall(Instruction *Begin,
1150+
Instruction *End) {
1151+
1152+
assert(Begin->getParent() == End->getParent() &&
1153+
"Expected to be in same basic block!");
1154+
unsigned NumInstChecked = 0;
1155+
// Check that all instructions in the range [Begin, End) are guaranteed to
1156+
// transfer execution to successor.
1157+
for (auto &I : make_range(Begin->getIterator(), End->getIterator()))
1158+
if (NumInstChecked++ > InlinerAttributeWindow ||
1159+
!isGuaranteedToTransferExecutionToSuccessor(&I))
1160+
return true;
1161+
return false;
1162+
}
1163+
1164+
static void AddReturnAttributes(CallSite CS, ValueToValueMapTy &VMap) {
1165+
if (!UpdateReturnAttributes)
1166+
return;
1167+
AttrBuilder AB(CS.getAttributes(), AttributeList::ReturnIndex);
1168+
if (AB.empty())
1169+
return;
1170+
1171+
auto *CalledFunction = CS.getCalledFunction();
1172+
auto &Context = CalledFunction->getContext();
1173+
1174+
for (auto &BB : *CalledFunction) {
1175+
auto *RI = dyn_cast<ReturnInst>(BB.getTerminator());
1176+
if (!RI || !isa<CallBase>(RI->getOperand(0)))
1177+
continue;
1178+
// Sanity check that the cloned return instruction exists and is a return
1179+
// instruction itself.
1180+
auto *NewRI = dyn_cast_or_null<ReturnInst>(VMap.lookup(RI));
1181+
if (!NewRI)
1182+
continue;
1183+
auto *RetVal = cast<CallBase>(RI->getOperand(0));
1184+
// Sanity check that the cloned RetVal exists and is a call.
1185+
// Simplification during inlining could have transformed the cloned
1186+
// instruction.
1187+
auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal));
1188+
if (!NewRetVal)
1189+
continue;
1190+
// Backward propagation of attributes to the returned value may be incorrect
1191+
// if it is control flow dependent.
1192+
// Consider:
1193+
// @callee {
1194+
// %rv = call @foo()
1195+
// %rv2 = call @bar()
1196+
// if (%rv2 != null)
1197+
// return %rv2
1198+
// if (%rv == null)
1199+
// exit()
1200+
// return %rv
1201+
// }
1202+
// caller() {
1203+
// %val = call nonnull @callee()
1204+
// }
1205+
// Here we cannot add the nonnull attribute on either foo or bar. So, we
1206+
// limit the check to both NewRetVal and NewRI are in the same basic block
1207+
// and there are no throwing/exiting instructions between these
1208+
// instructions.
1209+
if (NewRI->getParent() != NewRetVal->getParent() ||
1210+
MayContainThrowingOrExitingCall(NewRetVal, NewRI))
1211+
continue;
1212+
// Add to the existing attributes of NewRetVal.
1213+
// NB! When we have the same attribute already existing on NewRetVal, but
1214+
// with a differing value, the AttributeList's merge API honours the already
1215+
// existing attribute value (i.e. attributes such as dereferenceable,
1216+
// dereferenceable_or_null etc). See AttrBuilder::merge for more details.
1217+
AttributeList AL = NewRetVal->getAttributes();
1218+
AttributeList NewAL =
1219+
AL.addAttributes(Context, AttributeList::ReturnIndex, AB);
1220+
NewRetVal->setAttributes(NewAL);
1221+
}
1222+
}
1223+
11391224
/// If the inlined function has non-byval align arguments, then
11401225
/// add @llvm.assume-based alignment assumptions to preserve this information.
11411226
static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
@@ -1801,6 +1886,10 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
18011886
// Add noalias metadata if necessary.
18021887
AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR);
18031888

1889+
// Clone return attributes on the callsite into the calls within the inlined
1890+
// function which feed into its return value.
1891+
AddReturnAttributes(CS, VMap);
1892+
18041893
// Propagate llvm.mem.parallel_loop_access if necessary.
18051894
PropagateParallelLoopAccessMetadata(CS, VMap);
18061895

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
; RUN: opt < %s -inline-threshold=0 -always-inline -S | FileCheck %s
2+
; RUN: opt < %s -passes=always-inline -S | FileCheck %s
3+
4+
declare i8* @foo(i8*) argmemonly nounwind
5+
6+
define i8* @callee(i8 *%p) alwaysinline {
7+
; CHECK: @callee(
8+
; CHECK: call i8* @foo(i8* noalias %p)
9+
%r = call i8* @foo(i8* noalias %p)
10+
ret i8* %r
11+
}
12+
13+
define i8* @caller(i8* %ptr, i64 %x) {
14+
; CHECK-LABEL: @caller
15+
; CHECK: call nonnull i8* @foo(i8* noalias
16+
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
17+
%p = call nonnull i8* @callee(i8* %gep)
18+
ret i8* %p
19+
}
20+
21+
declare void @llvm.experimental.guard(i1,...)
22+
; Cannot add nonnull attribute to foo
23+
; because the guard is a throwing call
24+
define internal i8* @callee_with_throwable(i8* %p) alwaysinline {
25+
; CHECK-NOT: callee_with_throwable
26+
%r = call i8* @foo(i8* %p)
27+
%cond = icmp ne i8* %r, null
28+
call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
29+
ret i8* %r
30+
}
31+
32+
declare i8* @bar(i8*) readonly nounwind
33+
; Here also we cannot add nonnull attribute to the call bar.
34+
define internal i8* @callee_with_explicit_control_flow(i8* %p) alwaysinline {
35+
; CHECK-NOT: callee_with_explicit_control_flow
36+
%r = call i8* @bar(i8* %p)
37+
%cond = icmp ne i8* %r, null
38+
br i1 %cond, label %ret, label %orig
39+
40+
ret:
41+
ret i8* %r
42+
43+
orig:
44+
ret i8* %p
45+
}
46+
47+
define i8* @caller2(i8* %ptr, i64 %x, i1 %cond) {
48+
; CHECK-LABEL: @caller2
49+
; CHECK: call i8* @foo
50+
; CHECK: call i8* @bar
51+
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
52+
%p = call nonnull i8* @callee_with_throwable(i8* %gep)
53+
%q = call nonnull i8* @callee_with_explicit_control_flow(i8* %gep)
54+
br i1 %cond, label %pret, label %qret
55+
56+
pret:
57+
ret i8* %p
58+
59+
qret:
60+
ret i8* %q
61+
}
62+
63+
define internal i8* @callee3(i8 *%p) alwaysinline {
64+
; CHECK-NOT: callee3
65+
%r = call noalias i8* @foo(i8* %p)
66+
ret i8* %r
67+
}
68+
69+
; add the deref attribute to the existing attributes on foo.
70+
define i8* @caller3(i8* %ptr, i64 %x) {
71+
; CHECK-LABEL: caller3
72+
; CHECK: call noalias dereferenceable_or_null(12) i8* @foo
73+
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
74+
%p = call dereferenceable_or_null(12) i8* @callee3(i8* %gep)
75+
ret i8* %p
76+
}
77+
78+
declare i8* @inf_loop_call(i8*) nounwind
79+
; We cannot propagate attributes to foo because we do not know whether inf_loop_call
80+
; will return execution.
81+
define internal i8* @callee_with_sideeffect_callsite(i8* %p) alwaysinline {
82+
; CHECK-NOT: callee_with_sideeffect_callsite
83+
%r = call i8* @foo(i8* %p)
84+
%v = call i8* @inf_loop_call(i8* %p)
85+
ret i8* %r
86+
}
87+
88+
; do not add deref attribute to foo
89+
define i8* @test4(i8* %ptr, i64 %x) {
90+
; CHECK-LABEL: test4
91+
; CHECK: call i8* @foo
92+
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
93+
%p = call dereferenceable_or_null(12) i8* @callee_with_sideeffect_callsite(i8* %gep)
94+
ret i8* %p
95+
}
96+
97+
declare i8* @baz(i8*) nounwind readonly
98+
define internal i8* @callee5(i8* %p) alwaysinline {
99+
; CHECK-NOT: callee5
100+
%r = call i8* @foo(i8* %p)
101+
%v = call i8* @baz(i8* %p)
102+
ret i8* %r
103+
}
104+
105+
; add the deref attribute to foo.
106+
define i8* @test5(i8* %ptr, i64 %x) {
107+
; CHECK-LABEL: test5
108+
; CHECK: call dereferenceable_or_null(12) i8* @foo
109+
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
110+
%s = call dereferenceable_or_null(12) i8* @callee5(i8* %gep)
111+
ret i8* %s
112+
}
113+
114+
; deref attributes have different values on the callee and the call feeding into
115+
; the return.
116+
; AttrBuilder chooses the already existing value and does not overwrite it.
117+
define internal i8* @callee6(i8* %p) alwaysinline {
118+
; CHECK-NOT: callee6
119+
%r = call dereferenceable_or_null(16) i8* @foo(i8* %p)
120+
%v = call i8* @baz(i8* %p)
121+
ret i8* %r
122+
}
123+
124+
125+
define i8* @test6(i8* %ptr, i64 %x) {
126+
; CHECK-LABEL: test6
127+
; CHECK: call dereferenceable_or_null(16) i8* @foo
128+
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
129+
%s = call dereferenceable_or_null(12) i8* @callee6(i8* %gep)
130+
ret i8* %s
131+
}
132+
133+
; We add the attributes from the callee to both the calls below.
134+
define internal i8* @callee7(i8 *%ptr, i1 %cond) alwaysinline {
135+
; CHECK-NOT: @callee7(
136+
br i1 %cond, label %pass, label %fail
137+
138+
pass:
139+
%r = call i8* @foo(i8* noalias %ptr)
140+
ret i8* %r
141+
142+
fail:
143+
%s = call i8* @baz(i8* %ptr)
144+
ret i8* %s
145+
}
146+
147+
define void @test7(i8* %ptr, i64 %x, i1 %cond) {
148+
; CHECK-LABEL: @test7
149+
; CHECK: call nonnull i8* @foo(i8* noalias
150+
; CHECK: call nonnull i8* @baz
151+
; CHECK: phi i8*
152+
; CHECK: call void @snort
153+
154+
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
155+
%t = call nonnull i8* @callee7(i8* %gep, i1 %cond)
156+
call void @snort(i8* %t)
157+
ret void
158+
}
159+
declare void @snort(i8*)

0 commit comments

Comments
 (0)