Skip to content

Commit dde30a4

Browse files
authored
[CGP] Bail out if (Base|Scaled)Reg does not dominate insert point. (#142949)
(Base|Scaled)Reg may not dominate the chosen insert point, if there are multiple uses of the address. Bail out if that's the case, otherwise we will generate invalid IR. In some cases, we could probably adjust the insert point or hoist the (Base|Scaled)Reg. Fixes #142830. PR: #142949
1 parent 55e4c6d commit dde30a4

File tree

2 files changed

+87
-2
lines changed

2 files changed

+87
-2
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5945,8 +5945,17 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
59455945
// The current BB may be optimized multiple times, we can't guarantee the
59465946
// reuse of Addr happens later, call findInsertPos to find an appropriate
59475947
// insert position.
5948-
IRBuilder<> Builder(MemoryInst->getParent(),
5949-
findInsertPos(Addr, MemoryInst, SunkAddr));
5948+
auto InsertPos = findInsertPos(Addr, MemoryInst, SunkAddr);
5949+
5950+
// TODO: Adjust insert point considering (Base|Scaled)Reg if possible.
5951+
if (!SunkAddr) {
5952+
auto &DT = getDT(*MemoryInst->getFunction());
5953+
if ((AddrMode.BaseReg && !DT.dominates(AddrMode.BaseReg, &*InsertPos)) ||
5954+
(AddrMode.ScaledReg && !DT.dominates(AddrMode.ScaledReg, &*InsertPos)))
5955+
return Modified;
5956+
}
5957+
5958+
IRBuilder<> Builder(MemoryInst->getParent(), InsertPos);
59505959

59515960
if (SunkAddr) {
59525961
LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' %s | FileCheck %s
3+
4+
target triple = "x86_64-unknown-linux"
5+
6+
declare i1 @cond(float)
7+
8+
define void @scaled_reg_does_not_dominate_insert_point(ptr %src) {
9+
; CHECK-LABEL: define void @scaled_reg_does_not_dominate_insert_point(
10+
; CHECK-SAME: ptr [[SRC:%.*]]) {
11+
; CHECK-NEXT: [[BB:.*]]:
12+
; CHECK-NEXT: br label %[[LOOP:.*]]
13+
; CHECK: [[LOOP]]:
14+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[BB]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
15+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
16+
; CHECK-NEXT: [[SUNKADDR2:%.*]] = mul i64 [[IV_NEXT]], 2
17+
; CHECK-NEXT: [[SUNKADDR3:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[SUNKADDR2]]
18+
; CHECK-NEXT: [[SUNKADDR4:%.*]] = getelementptr i8, ptr [[SUNKADDR3]], i64 6
19+
; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[SUNKADDR4]], align 4
20+
; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV]], 2
21+
; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[SUNKADDR]]
22+
; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[SUNKADDR1]], align 4
23+
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @cond(float [[L_0]])
24+
; CHECK-NEXT: [[C:%.*]] = call i1 @cond(float [[L_1]])
25+
; CHECK-NEXT: br i1 [[C]], label %[[LOOP]], label %[[EXIT:.*]]
26+
; CHECK: [[EXIT]]:
27+
; CHECK-NEXT: ret void
28+
;
29+
bb:
30+
%gep.base = getelementptr i8, ptr %src, i64 8
31+
br label %loop
32+
33+
loop:
34+
%iv = phi i64 [ 0, %bb ], [ %iv.next, %loop ]
35+
%iv.shl = shl i64 %iv, 1
36+
%gep.shl = getelementptr i8, ptr %gep.base, i64 %iv.shl
37+
%gep.sub = getelementptr i8, ptr %gep.shl, i64 -8
38+
%iv.next = add i64 %iv, 1
39+
%l.0 = load float, ptr %gep.shl, align 4
40+
%l.1 = load float, ptr %gep.sub, align 4
41+
call i1 @cond(float %l.0)
42+
%c = call i1 @cond(float %l.1)
43+
br i1 %c, label %loop, label %exit
44+
45+
exit:
46+
ret void
47+
}
48+
49+
define void @check_dt_after_modifying_cfg(ptr %dst, i64 %x, i8 %y, i8 %z) {
50+
; CHECK-LABEL: define void @check_dt_after_modifying_cfg(
51+
; CHECK-SAME: ptr [[DST:%.*]], i64 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]]) {
52+
; CHECK-NEXT: [[ENTRY:.*]]:
53+
; CHECK-NEXT: [[OFFSET:%.*]] = lshr i64 [[X]], 2
54+
; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i8 [[Z]]
55+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SEL_FROZEN]], 0
56+
; CHECK-NEXT: br i1 [[CMP]], label %[[SELECT_END:.*]], label %[[SELECT_FALSE_SINK:.*]]
57+
; CHECK: [[SELECT_FALSE_SINK]]:
58+
; CHECK-NEXT: [[SMIN:%.*]] = tail call i8 @llvm.smin.i8(i8 [[Y]], i8 0)
59+
; CHECK-NEXT: br label %[[SELECT_END]]
60+
; CHECK: [[SELECT_END]]:
61+
; CHECK-NEXT: [[SEL:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[SMIN]], %[[SELECT_FALSE_SINK]] ]
62+
; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET]]
63+
; CHECK-NEXT: store i8 [[SEL]], ptr [[SUNKADDR]], align 1
64+
; CHECK-NEXT: ret void
65+
;
66+
entry:
67+
%offset = lshr i64 %x, 2
68+
%gep.dst = getelementptr i8, ptr %dst, i64 %offset
69+
%smin = tail call i8 @llvm.smin.i8(i8 %y, i8 0)
70+
%cmp = icmp slt i8 %z, 0
71+
%sel = select i1 %cmp, i8 0, i8 %smin
72+
store i8 %sel, ptr %gep.dst, align 1
73+
ret void
74+
}
75+
76+
declare i8 @llvm.smin.i8(i8, i8) #0

0 commit comments

Comments
 (0)