Skip to content

Commit 10df988

Browse files
[ArgPromotion] Handle pointer arguments of recursive calls (#78735)
Argument promotion doesn't handle recursive function calls to promote arguments. This patch adds functionality to handle self recursive function calls, i.e. whose SCC size is 1. Due to complexity of ValueTracking in recursive calls with SCC size greater than 1, we bail out in such cases.
1 parent 51b22f9 commit 10df988

File tree

7 files changed

+525
-5
lines changed

7 files changed

+525
-5
lines changed

llvm/lib/Transforms/IPO/ArgumentPromotion.cpp

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -423,9 +423,9 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
423423

424424
/// Return true if we can prove that all callees pass in a valid pointer for the
425425
/// specified function argument.
426-
static bool allCallersPassValidPointerForArgument(Argument *Arg,
427-
Align NeededAlign,
428-
uint64_t NeededDerefBytes) {
426+
static bool allCallersPassValidPointerForArgument(
427+
Argument *Arg, SmallPtrSetImpl<CallBase *> &RecursiveCalls,
428+
Align NeededAlign, uint64_t NeededDerefBytes) {
429429
Function *Callee = Arg->getParent();
430430
const DataLayout &DL = Callee->getDataLayout();
431431
APInt Bytes(64, NeededDerefBytes);
@@ -438,6 +438,33 @@ static bool allCallersPassValidPointerForArgument(Argument *Arg,
438438
// direct callees.
439439
return all_of(Callee->users(), [&](User *U) {
440440
CallBase &CB = cast<CallBase>(*U);
441+
// In case of functions with recursive calls, this check
442+
// (isDereferenceableAndAlignedPointer) will fail when it tries to look at
443+
// the first caller of this function. The caller may or may not have a load,
444+
// incase it doesn't load the pointer being passed, this check will fail.
445+
// So, it's safe to skip the check incase we know that we are dealing with a
446+
// recursive call. For example we have a IR given below.
447+
//
448+
// def fun(ptr %a) {
449+
// ...
450+
// %loadres = load i32, ptr %a, align 4
451+
// %res = call i32 @fun(ptr %a)
452+
// ...
453+
// }
454+
//
455+
// def bar(ptr %x) {
456+
// ...
457+
// %resbar = call i32 @fun(ptr %x)
458+
// ...
459+
// }
460+
//
461+
// Since we record processed recursive calls, we check if the current
462+
// CallBase has been processed before. If yes it means that it is a
463+
// recursive call and we can skip the check just for this call. So, just
464+
// return true.
465+
if (RecursiveCalls.contains(&CB))
466+
return true;
467+
441468
return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()),
442469
NeededAlign, Bytes, DL);
443470
});
@@ -571,6 +598,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
571598
SmallVector<const Use *, 16> Worklist;
572599
SmallPtrSet<const Use *, 16> Visited;
573600
SmallVector<LoadInst *, 16> Loads;
601+
SmallPtrSet<CallBase *, 4> RecursiveCalls;
574602
auto AppendUses = [&](const Value *V) {
575603
for (const Use &U : V->uses())
576604
if (Visited.insert(&U).second)
@@ -611,6 +639,33 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
611639
// unknown users
612640
}
613641

642+
auto *CB = dyn_cast<CallBase>(V);
643+
Value *PtrArg = cast<Value>(U);
644+
if (CB && PtrArg && CB->getCalledFunction() == CB->getFunction()) {
645+
if (PtrArg != Arg) {
646+
LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
647+
<< "pointer offset is not equal to zero\n");
648+
return false;
649+
}
650+
651+
unsigned int ArgNo = Arg->getArgNo();
652+
if (CB->getArgOperand(ArgNo) != Arg || U->getOperandNo() != ArgNo) {
653+
LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
654+
<< "arg position is different in callee\n");
655+
return false;
656+
}
657+
658+
// We limit promotion to only promoting up to a fixed number of elements
659+
// of the aggregate.
660+
if (MaxElements > 0 && ArgParts.size() > MaxElements) {
661+
LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
662+
<< "more than " << MaxElements << " parts\n");
663+
return false;
664+
}
665+
666+
RecursiveCalls.insert(CB);
667+
continue;
668+
}
614669
// Unknown user.
615670
LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
616671
<< "unknown user " << *V << "\n");
@@ -619,7 +674,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
619674

620675
if (NeededDerefBytes || NeededAlign > 1) {
621676
// Try to prove a required deref / aligned requirement.
622-
if (!allCallersPassValidPointerForArgument(Arg, NeededAlign,
677+
if (!allCallersPassValidPointerForArgument(Arg, RecursiveCalls, NeededAlign,
623678
NeededDerefBytes)) {
624679
LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
625680
<< "not dereferenceable or aligned\n");
@@ -700,6 +755,10 @@ static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,
700755
/// calls the DoPromotion method.
701756
static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
702757
unsigned MaxElements, bool IsRecursive) {
758+
// Due to complexity of handling cases where the SCC has more than one
759+
// component. We want to limit argument promotion of recursive calls to
760+
// just functions that directly call themselves.
761+
bool IsSelfRecursive = false;
703762
// Don't perform argument promotion for naked functions; otherwise we can end
704763
// up removing parameters that are seemingly 'not used' as they are referred
705764
// to in the assembly.
@@ -745,8 +804,10 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
745804
if (CB->isMustTailCall())
746805
return nullptr;
747806

748-
if (CB->getFunction() == F)
807+
if (CB->getFunction() == F) {
749808
IsRecursive = true;
809+
IsSelfRecursive = true;
810+
}
750811
}
751812

752813
// Can't change signature of musttail caller
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
2+
; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
3+
4+
%T = type { i32, i32, i32, i32 }
5+
@G = constant %T { i32 0, i32 0, i32 17, i32 25 }
6+
7+
define internal i32 @test(ptr %p) {
8+
; CHECK-LABEL: define {{[^@]+}}@test
9+
; CHECK-SAME: (i32 [[P_8_VAL:%.*]], i32 [[P_12_VAL:%.*]]) {
10+
; CHECK-NEXT: entry:
11+
; CHECK-NEXT: [[V:%.*]] = add i32 [[P_12_VAL]], [[P_8_VAL]]
12+
; CHECK-NEXT: [[RET:%.*]] = call i32 @test(i32 [[P_8_VAL]], i32 [[P_12_VAL]])
13+
; CHECK-NEXT: [[ARET:%.*]] = add i32 [[V]], [[RET]]
14+
; CHECK-NEXT: ret i32 [[ARET]]
15+
;
16+
entry:
17+
%a.gep = getelementptr %T, ptr %p, i64 0, i32 3
18+
%b.gep = getelementptr %T, ptr %p, i64 0, i32 2
19+
%a = load i32, ptr %a.gep
20+
%b = load i32, ptr %b.gep
21+
%v = add i32 %a, %b
22+
%ret = call i32 @test(ptr %p)
23+
%aret = add i32 %v, %ret
24+
ret i32 %aret
25+
}
26+
27+
define i32 @caller() {
28+
; CHECK-LABEL: define {{[^@]+}}@caller() {
29+
; CHECK-NEXT: entry:
30+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr @G, i64 8
31+
; CHECK-NEXT: [[G_VAL:%.*]] = load i32, ptr [[TMP0]], align 4
32+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr @G, i64 12
33+
; CHECK-NEXT: [[G_VAL1:%.*]] = load i32, ptr [[TMP1]], align 4
34+
; CHECK-NEXT: [[V:%.*]] = call i32 @test(i32 [[G_VAL]], i32 [[G_VAL1]])
35+
; CHECK-NEXT: ret i32 [[V]]
36+
;
37+
entry:
38+
%v = call i32 @test(ptr @G)
39+
ret i32 %v
40+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
3+
define internal i32 @foo(ptr %x, i32 %n, i32 %m) {
4+
; CHECK-LABEL: define internal i32 @foo(
5+
; CHECK-SAME: i32 [[X_0_VAL:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
6+
; CHECK-NEXT: [[ENTRY:.*:]]
7+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[N]], 0
8+
; CHECK-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
9+
; CHECK: [[COND_TRUE]]:
10+
; CHECK-NEXT: br label %[[RETURN:.*]]
11+
; CHECK: [[COND_FALSE]]:
12+
; CHECK-NEXT: [[SUBVAL:%.*]] = sub i32 [[N]], 1
13+
; CHECK-NEXT: [[CALLRET:%.*]] = call i32 @foo(i32 [[X_0_VAL]], i32 [[SUBVAL]], i32 [[X_0_VAL]])
14+
; CHECK-NEXT: [[SUBVAL2:%.*]] = sub i32 [[N]], 2
15+
; CHECK-NEXT: [[CALLRET2:%.*]] = call i32 @foo(i32 [[X_0_VAL]], i32 [[SUBVAL2]], i32 [[M]])
16+
; CHECK-NEXT: [[CMP2:%.*]] = add i32 [[CALLRET]], [[CALLRET2]]
17+
; CHECK-NEXT: br label %[[RETURN]]
18+
; CHECK: [[COND_NEXT:.*]]:
19+
; CHECK-NEXT: br label %[[RETURN]]
20+
; CHECK: [[RETURN]]:
21+
; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[X_0_VAL]], %[[COND_TRUE]] ], [ [[CMP2]], %[[COND_FALSE]] ], [ poison, %[[COND_NEXT]] ]
22+
; CHECK-NEXT: ret i32 [[RETVAL_0]]
23+
;
24+
entry:
25+
%cmp = icmp ne i32 %n, 0
26+
br i1 %cmp, label %cond_true, label %cond_false
27+
28+
cond_true: ; preds = %entry
29+
%val = load i32, ptr %x, align 4
30+
br label %return
31+
32+
cond_false: ; preds = %entry
33+
%val2 = load i32, ptr %x, align 4
34+
%subval = sub i32 %n, 1
35+
%callret = call i32 @foo(ptr %x, i32 %subval, i32 %val2)
36+
%subval2 = sub i32 %n, 2
37+
%callret2 = call i32 @foo(ptr %x, i32 %subval2, i32 %m)
38+
%cmp2 = add i32 %callret, %callret2
39+
br label %return
40+
41+
cond_next: ; No predecessors!
42+
br label %return
43+
44+
return: ; preds = %cond_next, %cond_false, %cond_true
45+
%retval.0 = phi i32 [ %val, %cond_true ], [ %cmp2, %cond_false ], [ poison, %cond_next ]
46+
ret i32 %retval.0
47+
}
48+
49+
define i32 @bar(ptr align(4) dereferenceable(4) %x, i32 %n, i32 %m) {
50+
; CHECK-LABEL: define i32 @bar(
51+
; CHECK-SAME: ptr align 4 dereferenceable(4) [[X:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
52+
; CHECK-NEXT: [[ENTRY:.*:]]
53+
; CHECK-NEXT: [[X_VAL:%.*]] = load i32, ptr [[X]], align 4
54+
; CHECK-NEXT: [[CALLRET3:%.*]] = call i32 @foo(i32 [[X_VAL]], i32 [[N]], i32 [[M]])
55+
; CHECK-NEXT: br label %[[RETURN:.*]]
56+
; CHECK: [[RETURN]]:
57+
; CHECK-NEXT: ret i32 [[CALLRET3]]
58+
;
59+
entry:
60+
%callret3 = call i32 @foo(ptr %x, i32 %n, i32 %m)
61+
br label %return
62+
63+
return: ; preds = %entry
64+
ret i32 %callret3
65+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
3+
define internal i32 @foo(ptr %x, ptr %y, i32 %n, i32 %m) {
4+
; CHECK-LABEL: define internal i32 @foo(
5+
; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
6+
; CHECK-NEXT: [[ENTRY:.*:]]
7+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[N]], 0
8+
; CHECK-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
9+
; CHECK: [[COND_TRUE]]:
10+
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X]], align 4
11+
; CHECK-NEXT: br label %[[RETURN:.*]]
12+
; CHECK: [[COND_FALSE]]:
13+
; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr [[X]], align 4
14+
; CHECK-NEXT: [[VAL3:%.*]] = load i32, ptr [[Y]], align 4
15+
; CHECK-NEXT: [[SUBVAL:%.*]] = sub i32 [[N]], [[VAL3]]
16+
; CHECK-NEXT: [[CALLRET:%.*]] = call i32 @foo(ptr [[X]], ptr [[Y]], i32 [[SUBVAL]], i32 [[VAL2]])
17+
; CHECK-NEXT: [[SUBVAL2:%.*]] = sub i32 [[N]], 2
18+
; CHECK-NEXT: [[CALLRET2:%.*]] = call i32 @foo(ptr [[Y]], ptr [[X]], i32 [[SUBVAL2]], i32 [[M]])
19+
; CHECK-NEXT: [[CMP2:%.*]] = add i32 [[CALLRET]], [[CALLRET2]]
20+
; CHECK-NEXT: br label %[[RETURN]]
21+
; CHECK: [[COND_NEXT:.*]]:
22+
; CHECK-NEXT: br label %[[RETURN]]
23+
; CHECK: [[RETURN]]:
24+
; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[VAL]], %[[COND_TRUE]] ], [ [[CMP2]], %[[COND_FALSE]] ], [ poison, %[[COND_NEXT]] ]
25+
; CHECK-NEXT: ret i32 [[RETVAL_0]]
26+
;
27+
entry:
28+
%cmp = icmp ne i32 %n, 0
29+
br i1 %cmp, label %cond_true, label %cond_false
30+
31+
cond_true: ; preds = %entry
32+
%val = load i32, ptr %x, align 4
33+
br label %return
34+
35+
cond_false: ; preds = %entry
36+
%val2 = load i32, ptr %x, align 4
37+
%val3 = load i32, ptr %y, align 4
38+
%subval = sub i32 %n, %val3
39+
%callret = call i32 @foo(ptr %x, ptr %y, i32 %subval, i32 %val2)
40+
%subval2 = sub i32 %n, 2
41+
%callret2 = call i32 @foo(ptr %y, ptr %x, i32 %subval2, i32 %m)
42+
%cmp2 = add i32 %callret, %callret2
43+
br label %return
44+
45+
cond_next: ; No predecessors!
46+
br label %return
47+
48+
return: ; preds = %cond_next, %cond_false, %cond_true
49+
%retval.0 = phi i32 [ %val, %cond_true ], [ %cmp2, %cond_false ], [ poison, %cond_next ]
50+
ret i32 %retval.0
51+
}
52+
53+
define i32 @bar(ptr align(4) dereferenceable(4) %x, ptr align(4) dereferenceable(4) %y, i32 %n, i32 %m) {
54+
; CHECK-LABEL: define i32 @bar(
55+
; CHECK-SAME: ptr align 4 dereferenceable(4) [[X:%.*]], ptr align 4 dereferenceable(4) [[Y:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
56+
; CHECK-NEXT: [[ENTRY:.*:]]
57+
; CHECK-NEXT: [[CALLRET3:%.*]] = call i32 @foo(ptr [[X]], ptr [[Y]], i32 [[N]], i32 [[M]])
58+
; CHECK-NEXT: br label %[[RETURN:.*]]
59+
; CHECK: [[RETURN]]:
60+
; CHECK-NEXT: ret i32 [[CALLRET3]]
61+
;
62+
entry:
63+
%callret3 = call i32 @foo(ptr %x, ptr %y, i32 %n, i32 %m)
64+
br label %return
65+
66+
return: ; preds = %entry
67+
ret i32 %callret3
68+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
3+
define internal i32 @zoo(ptr %x, i32 %m) {
4+
; CHECK-LABEL: define internal i32 @zoo(
5+
; CHECK-SAME: i32 [[X_0_VAL:%.*]], i32 [[M:%.*]]) {
6+
; CHECK-NEXT: [[RESZOO:%.*]] = add i32 [[X_0_VAL]], [[M]]
7+
; CHECK-NEXT: ret i32 [[X_0_VAL]]
8+
;
9+
%valzoo = load i32, ptr %x, align 4
10+
%reszoo = add i32 %valzoo, %m
11+
ret i32 %valzoo
12+
}
13+
14+
define internal i32 @foo(ptr %x, ptr %y, i32 %n, i32 %m) {
15+
; CHECK-LABEL: define internal i32 @foo(
16+
; CHECK-SAME: ptr [[X:%.*]], i32 [[Y_0_VAL:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
17+
; CHECK-NEXT: [[ENTRY:.*:]]
18+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[N]], 0
19+
; CHECK-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
20+
; CHECK: [[COND_TRUE]]:
21+
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X]], align 4
22+
; CHECK-NEXT: br label %[[RETURN:.*]]
23+
; CHECK: [[COND_FALSE]]:
24+
; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr [[X]], align 4
25+
; CHECK-NEXT: [[SUBVAL:%.*]] = sub i32 [[N]], [[Y_0_VAL]]
26+
; CHECK-NEXT: [[CALLRET:%.*]] = call i32 @foo(ptr [[X]], i32 [[Y_0_VAL]], i32 [[SUBVAL]], i32 [[VAL2]])
27+
; CHECK-NEXT: [[SUBVAL2:%.*]] = sub i32 [[N]], 2
28+
; CHECK-NEXT: [[CALLRET2:%.*]] = call i32 @foo(ptr [[X]], i32 [[Y_0_VAL]], i32 [[SUBVAL2]], i32 [[M]])
29+
; CHECK-NEXT: [[CMP1:%.*]] = add i32 [[CALLRET]], [[CALLRET2]]
30+
; CHECK-NEXT: [[X_VAL:%.*]] = load i32, ptr [[X]], align 4
31+
; CHECK-NEXT: [[CALLRETFINAL:%.*]] = call i32 @zoo(i32 [[X_VAL]], i32 [[M]])
32+
; CHECK-NEXT: [[CMP2:%.*]] = add i32 [[CMP1]], [[CALLRETFINAL]]
33+
; CHECK-NEXT: br label %[[RETURN]]
34+
; CHECK: [[COND_NEXT:.*]]:
35+
; CHECK-NEXT: br label %[[RETURN]]
36+
; CHECK: [[RETURN]]:
37+
; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[VAL]], %[[COND_TRUE]] ], [ [[CMP2]], %[[COND_FALSE]] ], [ poison, %[[COND_NEXT]] ]
38+
; CHECK-NEXT: ret i32 [[RETVAL_0]]
39+
;
40+
entry:
41+
%cmp = icmp ne i32 %n, 0
42+
br i1 %cmp, label %cond_true, label %cond_false
43+
44+
cond_true: ; preds = %entry
45+
%val = load i32, ptr %x, align 4
46+
br label %return
47+
48+
cond_false: ; preds = %entry
49+
%val2 = load i32, ptr %x, align 4
50+
%val3 = load i32, ptr %y, align 4
51+
%subval = sub i32 %n, %val3
52+
%callret = call i32 @foo(ptr %x, ptr %y, i32 %subval, i32 %val2)
53+
%subval2 = sub i32 %n, 2
54+
%callret2 = call i32 @foo(ptr %x, ptr %y, i32 %subval2, i32 %m)
55+
%cmp1 = add i32 %callret, %callret2
56+
%callretfinal = call i32 @zoo(ptr %x, i32 %m)
57+
%cmp2 = add i32 %cmp1, %callretfinal
58+
br label %return
59+
60+
cond_next: ; No predecessors!
61+
br label %return
62+
63+
return: ; preds = %cond_next, %cond_false, %cond_true
64+
%retval.0 = phi i32 [ %val, %cond_true ], [ %cmp2, %cond_false ], [ poison, %cond_next ]
65+
ret i32 %retval.0
66+
}
67+
68+
define i32 @bar(ptr align(4) dereferenceable(4) %x, ptr align(4) dereferenceable(4) %y, i32 %n, i32 %m) {
69+
; CHECK-LABEL: define i32 @bar(
70+
; CHECK-SAME: ptr align 4 dereferenceable(4) [[X:%.*]], ptr align 4 dereferenceable(4) [[Y:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
71+
; CHECK-NEXT: [[ENTRY:.*:]]
72+
; CHECK-NEXT: [[Y_VAL:%.*]] = load i32, ptr [[Y]], align 4
73+
; CHECK-NEXT: [[CALLRET3:%.*]] = call i32 @foo(ptr [[X]], i32 [[Y_VAL]], i32 [[N]], i32 [[M]])
74+
; CHECK-NEXT: br label %[[RETURN:.*]]
75+
; CHECK: [[RETURN]]:
76+
; CHECK-NEXT: ret i32 [[CALLRET3]]
77+
;
78+
entry:
79+
%callret3 = call i32 @foo(ptr %x, ptr %y, i32 %n, i32 %m)
80+
br label %return
81+
82+
return: ; preds = %entry
83+
ret i32 %callret3
84+
}

0 commit comments

Comments
 (0)