Skip to content

Commit 59c6d70

Browse files
authored
[CodeGenPrepare] Make sure instruction get from SunkAddrs is before MemoryInst (#139303)
Function optimizeBlock may do optimizations on a block for multiple times. In the first iteration of the loop, MemoryInst1 may generate a sunk instruction and store it into SunkAddrs. In the second iteration of the loop, MemoryInst2 may use the same address and then it can reuse the sunk instruction stored in SunkAddrs, but MemoryInst2 may be before MemoryInst1 and the corresponding sunk instruction. In order to avoid use before def error, we need to find appropriate insert position for the sunk instruction. Fixes #138208.
1 parent 0eb4bd2 commit 59c6d70

File tree

2 files changed

+80
-5
lines changed

2 files changed

+80
-5
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5771,6 +5771,35 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
57715771
return false;
57725772
}
57735773

5774+
// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
5775+
// is the first instruction that will use Addr. So we need to find the first
5776+
// user of Addr in current BB.
5777+
static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst,
5778+
Value *SunkAddr) {
5779+
if (Addr->hasOneUse())
5780+
return MemoryInst->getIterator();
5781+
5782+
// We already have a SunkAddr in current BB, but we may need to insert cast
5783+
// instruction after it.
5784+
if (SunkAddr) {
5785+
if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
5786+
return std::next(AddrInst->getIterator());
5787+
}
5788+
5789+
// Find the first user of Addr in current BB.
5790+
Instruction *Earliest = MemoryInst;
5791+
for (User *U : Addr->users()) {
5792+
Instruction *UserInst = dyn_cast<Instruction>(U);
5793+
if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
5794+
if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
5795+
continue;
5796+
if (UserInst->comesBefore(Earliest))
5797+
Earliest = UserInst;
5798+
}
5799+
}
5800+
return Earliest->getIterator();
5801+
}
5802+
57745803
/// Sink addressing mode computation immediate before MemoryInst if doing so
57755804
/// can be done without increasing register pressure. The need for the
57765805
/// register pressure constraint means this can end up being an all or nothing
@@ -5895,11 +5924,6 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
58955924
return Modified;
58965925
}
58975926

5898-
// Insert this computation right after this user. Since our caller is
5899-
// scanning from the top of the BB to the bottom, reuse of the expr are
5900-
// guaranteed to happen later.
5901-
IRBuilder<> Builder(MemoryInst);
5902-
59035927
// Now that we determined the addressing expression we want to use and know
59045928
// that we have to sink it into this block. Check to see if we have already
59055929
// done this for some other load/store instr in this block. If so, reuse
@@ -5910,6 +5934,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
59105934

59115935
Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
59125936
Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5937+
5938+
// The current BB may be optimized multiple times, we can't guarantee the
5939+
// reuse of Addr happens later, call findInsertPos to find an appropriate
5940+
// insert position.
5941+
IRBuilder<> Builder(MemoryInst->getParent(),
5942+
findInsertPos(Addr, MemoryInst, SunkAddr));
5943+
59135944
if (SunkAddr) {
59145945
LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
59155946
<< " for " << *MemoryInst << "\n");
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -p 'require<profile-summary>,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s
3+
4+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
5+
target triple = "x86_64-grtev4-linux-gnu"
6+
7+
declare void @g(ptr)
8+
9+
; %load and %load5 use the same address, %load5 is optimized first, %load is
10+
; optimized later and reuse the same address computation instruction. We must
11+
; make sure not to generate use before def error.
12+
13+
define void @f(ptr %arg) {
14+
; CHECK-LABEL: define void @f(
15+
; CHECK-SAME: ptr [[ARG:%.*]]) {
16+
; CHECK-NEXT: [[BB:.*:]]
17+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
18+
; CHECK-NEXT: call void @g(ptr [[GETELEMENTPTR]])
19+
; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
20+
; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
21+
; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56
22+
; CHECK-NEXT: [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8
23+
; CHECK-NEXT: [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
24+
; CHECK-NEXT: [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0)
25+
; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0
26+
; CHECK-NEXT: ret void
27+
;
28+
bb:
29+
%getelementptr = getelementptr i8, ptr %arg, i64 -64
30+
%getelementptr1 = getelementptr i8, ptr %arg, i64 -56
31+
call void @g(ptr %getelementptr)
32+
br label %bb3
33+
34+
bb3:
35+
%load = load ptr, ptr %getelementptr, align 8
36+
%load4 = load i32, ptr %getelementptr1, align 8
37+
%load5 = load ptr, ptr %getelementptr, align 8
38+
%add = add i32 1, 0
39+
%icmp = icmp eq i32 %add, 0
40+
br i1 %icmp, label %bb7, label %bb7
41+
42+
bb7:
43+
ret void
44+
}

0 commit comments

Comments
 (0)