Skip to content

Commit 8935aca

Browse files
committed
CodeGenPrep: preserve inbounds attribute when sinking GEPs.
Targets can potentially emit more efficient code if they know address computations never overflow. For example ILP32 code on AArch64 (which only has 64-bit address computation) can ignore the possibility of overflow with this extra information. llvm-svn: 355926
1 parent 08dc382 commit 8935aca

File tree

8 files changed

+53
-29
lines changed

8 files changed

+53
-29
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2073,6 +2073,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
20732073
Value *BaseReg = nullptr;
20742074
Value *ScaledReg = nullptr;
20752075
Value *OriginalValue = nullptr;
2076+
bool InBounds = true;
20762077

20772078
enum FieldName {
20782079
NoField = 0x00,
@@ -2084,6 +2085,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
20842085
MultipleFields = 0xff
20852086
};
20862087

2088+
20872089
ExtAddrMode() = default;
20882090

20892091
void print(raw_ostream &OS) const;
@@ -2102,6 +2104,10 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
21022104
ScaledReg->getType() != other.ScaledReg->getType())
21032105
return MultipleFields;
21042106

2107+
// Conservatively reject 'inbounds' mismatches.
2108+
if (InBounds != other.InBounds)
2109+
return MultipleFields;
2110+
21052111
// Check each field to see if it differs.
21062112
unsigned Result = NoField;
21072113
if (BaseReg != other.BaseReg)
@@ -2200,6 +2206,8 @@ static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
22002206
void ExtAddrMode::print(raw_ostream &OS) const {
22012207
bool NeedPlus = false;
22022208
OS << "[";
2209+
if (InBounds)
2210+
OS << "inbounds ";
22032211
if (BaseGV) {
22042212
OS << (NeedPlus ? " + " : "")
22052213
<< "GV:";
@@ -3498,6 +3506,7 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
34983506
ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
34993507
if (isa<Instruction>(ScaleReg) && // not a constant expr.
35003508
match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
3509+
TestAddrMode.InBounds = false;
35013510
TestAddrMode.ScaledReg = AddLHS;
35023511
TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
35033512

@@ -4072,6 +4081,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
40724081
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
40734082
TPT.getRestorationPoint();
40744083

4084+
AddrMode.InBounds = false;
40754085
if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
40764086
matchAddr(AddrInst->getOperand(0), Depth+1))
40774087
return true;
@@ -4098,6 +4108,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
40984108
case Instruction::Mul:
40994109
case Instruction::Shl: {
41004110
// Can only handle X*C and X << C.
4111+
AddrMode.InBounds = false;
41014112
ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
41024113
if (!RHS || RHS->getBitWidth() > 64)
41034114
return false;
@@ -4149,8 +4160,11 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
41494160
if (ConstantOffset == 0 ||
41504161
TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
41514162
// Check to see if we can fold the base pointer in too.
4152-
if (matchAddr(AddrInst->getOperand(0), Depth+1))
4163+
if (matchAddr(AddrInst->getOperand(0), Depth+1)) {
4164+
if (!cast<GEPOperator>(AddrInst)->isInBounds())
4165+
AddrMode.InBounds = false;
41534166
return true;
4167+
}
41544168
} else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
41554169
TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
41564170
ConstantOffset > 0) {
@@ -4186,6 +4200,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
41864200

41874201
// See if the scale and offset amount is valid for this target.
41884202
AddrMode.BaseOffs += ConstantOffset;
4203+
if (!cast<GEPOperator>(AddrInst)->isInBounds())
4204+
AddrMode.InBounds = false;
41894205

41904206
// Match the base operand of the GEP.
41914207
if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
@@ -4871,7 +4887,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
48714887
// SDAG consecutive load/store merging.
48724888
if (ResultPtr->getType() != I8PtrTy)
48734889
ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
4874-
ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
4890+
ResultPtr =
4891+
AddrMode.InBounds
4892+
? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
4893+
"sunkaddr")
4894+
: Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
48754895
}
48764896

48774897
ResultIndex = V;
@@ -4882,7 +4902,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
48824902
} else {
48834903
if (ResultPtr->getType() != I8PtrTy)
48844904
ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
4885-
SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
4905+
SunkAddr =
4906+
AddrMode.InBounds
4907+
? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
4908+
"sunkaddr")
4909+
: Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
48864910
}
48874911

48884912
if (SunkAddr->getType() != Addr->getType())

llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ done:
492492
%struct.foo = type { [3 x float], [3 x float] }
493493

494494
; OPT-LABEL: @sink_ds_address(
495-
; OPT: getelementptr i8,
495+
; OPT: getelementptr inbounds i8,
496496

497497
; GCN-LABEL: {{^}}sink_ds_address:
498498
; GCN: s_load_dword [[SREG1:s[0-9]+]],

llvm/test/CodeGen/Thumb/addr-modes.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ target triple = "thumbv6m-arm-none-eabi"
1414

1515
; Test case 01: %n is scaled by 4 (size of i32).
1616
; Expected: GEP cannot be folded into LOAD.
17-
; CHECK: local addrmode: [Base:%arrayidx]
17+
; CHECK: local addrmode: [inbounds Base:%arrayidx]
1818
define i32 @load01(i32* %p, i32 %n) nounwind {
1919
entry:
2020
%arrayidx = getelementptr inbounds i32, i32* %p, i32 %n
@@ -24,7 +24,7 @@ entry:
2424

2525
; Test case 02: No scale of %n is needed because the size of i8 is 1.
2626
; Expected: GEP can be folded into LOAD.
27-
; CHECK: local addrmode: [Base:%p + 1*%n]
27+
; CHECK: local addrmode: [inbounds Base:%p + 1*%n]
2828
define i8 @load02(i8* %p, i32 %n) nounwind {
2929
entry:
3030
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %n

llvm/test/Transforms/CodeGenPrepare/Mips/pr35209.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ cl: ; preds = %sw.bb, %entry
5454
; CHECK-NOT: %{{[0-9]+}} = load %struct.bt*, %struct.bt** %bw
5555

5656
; CHECK: %[[I1:[0-9]+]] = bitcast %struct.az* %[[I0]] to i8*
57-
; CHECK-NEXT: %sunkaddr = getelementptr i8, i8* %[[I1]], i64 8
57+
; CHECK-NEXT: %sunkaddr = getelementptr inbounds i8, i8* %[[I1]], i64 8
5858
; CHECK-NEXT: %[[I2:[0-9]+]] = bitcast i8* %sunkaddr to %struct.bt**
5959
; CHECK-NEXT: %{{[0-9]+}} = load %struct.bt*, %struct.bt** %[[I2]]
6060
; CHECK-NEXT: tail call void (i8*, ...) @a

llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ define void @nophi(i32* %p) {
219219
; CHECK-NEXT: br label [[INDIRECTGOTO]]
220220
; CHECK: indirectgoto:
221221
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to i8*
222-
; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, i8* [[TMP0]], i64 4
222+
; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 4
223223
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SUNKADDR]] to i32*
224224
; CHECK-NEXT: [[NEWP:%.*]] = load i32, i32* [[TMP1]], align 4
225225
; CHECK-NEXT: [[IDX:%.*]] = sext i32 [[NEWP]] to i64

llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ if.then:
4141
br label %fallthrough
4242

4343
fallthrough:
44-
; CHECK: getelementptr i8, {{.+}} 40
44+
; CHECK: getelementptr inbounds i8, {{.+}} 40
4545
%b = phi i64* [%b1, %entry], [%b2, %if.then]
4646
%c = phi i32* [%c1, %entry], [%c2, %if.then]
4747
%v = load i32, i32* %c, align 4
@@ -111,7 +111,7 @@ if.then:
111111
br label %fallthrough
112112

113113
fallthrough:
114-
; CHECK: getelementptr i8, {{.+}} 40
114+
; CHECK: getelementptr inbounds i8, {{.+}} 40
115115
%b = phi i64* [%b1, %entry], [%b2, %if.then]
116116
%c = phi i32* [%c1, %entry], [%c2, %if.then]
117117
%v = load i32, i32* %c, align 4
@@ -199,7 +199,7 @@ if.then:
199199
br label %fallthrough
200200

201201
fallthrough:
202-
; CHECK: getelementptr i8, {{.+}} 40
202+
; CHECK: getelementptr inbounds i8, {{.+}} 40
203203
%c = phi i32* [%c3, %loop], [%c2, %if.then]
204204
%b = phi i64* [%b3, %loop], [%b2, %if.then]
205205
%v = load volatile i32, i32* %c, align 4

llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu"
99
; Can we sink single addressing mode computation to use?
1010
define void @test1(i1 %cond, i64* %base) {
1111
; CHECK-LABEL: @test1
12-
; CHECK: getelementptr i8, {{.+}} 40
12+
; CHECK: getelementptr inbounds i8, {{.+}} 40
1313
entry:
1414
%addr = getelementptr inbounds i64, i64* %base, i64 5
1515
%casted = bitcast i64* %addr to i32*
@@ -35,15 +35,15 @@ entry:
3535

3636
if.then:
3737
; CHECK-LABEL: if.then:
38-
; CHECK: getelementptr i8, {{.+}} 40
38+
; CHECK: getelementptr inbounds i8, {{.+}} 40
3939
%v1 = load i32, i32* %casted, align 4
4040
call void @foo(i32 %v1)
4141
%cmp = icmp eq i32 %v1, 0
4242
br i1 %cmp, label %next, label %fallthrough
4343

4444
next:
4545
; CHECK-LABEL: next:
46-
; CHECK: getelementptr i8, {{.+}} 40
46+
; CHECK: getelementptr inbounds i8, {{.+}} 40
4747
%v2 = load i32, i32* %casted, align 4
4848
call void @foo(i32 %v2)
4949
br label %fallthrough
@@ -63,10 +63,10 @@ entry:
6363

6464
if.then:
6565
; CHECK-LABEL: if.then:
66-
; CHECK: getelementptr i8, {{.+}} 40
66+
; CHECK: getelementptr inbounds i8, {{.+}} 40
6767
%v1 = load i32, i32* %casted, align 4
6868
call void @foo(i32 %v1)
69-
; CHECK-NOT: getelementptr i8, {{.+}} 40
69+
; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
7070
%v2 = load i32, i32* %casted, align 4
7171
call void @foo(i32 %v2)
7272
br label %fallthrough
@@ -86,7 +86,7 @@ entry:
8686

8787
if.then:
8888
; CHECK-LABEL: if.then:
89-
; CHECK: getelementptr i8, {{.+}} 40
89+
; CHECK: getelementptr inbounds i8, {{.+}} 40
9090
%v1 = load i32, i32* %casted, align 4
9191
call void @foo(i32 %v1)
9292
%cmp = icmp eq i32 %v1, 0
@@ -97,7 +97,7 @@ fallthrough:
9797

9898
rare.1:
9999
; CHECK-LABEL: rare.1:
100-
; CHECK: getelementptr i8, {{.+}} 40
100+
; CHECK: getelementptr inbounds i8, {{.+}} 40
101101
call void @slowpath(i32 %v1, i32* %casted) cold
102102
br label %fallthrough
103103
}
@@ -106,14 +106,14 @@ rare.1:
106106
define void @test5(i1 %cond, i64* %base) {
107107
; CHECK-LABEL: @test5
108108
entry:
109-
; CHECK: %addr = getelementptr
109+
; CHECK: %addr = getelementptr inbounds
110110
%addr = getelementptr inbounds i64, i64* %base, i64 5
111111
%casted = bitcast i64* %addr to i32*
112112
br i1 %cond, label %if.then, label %fallthrough
113113

114114
if.then:
115115
; CHECK-LABEL: if.then:
116-
; CHECK-NOT: getelementptr i8, {{.+}} 40
116+
; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
117117
%v1 = load i32, i32* %casted, align 4
118118
call void @foo(i32 %v1)
119119
%cmp = icmp eq i32 %v1, 0
@@ -138,7 +138,7 @@ entry:
138138

139139
if.then:
140140
; CHECK-LABEL: if.then:
141-
; CHECK-NOT: getelementptr i8, {{.+}} 40
141+
; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
142142
%v1 = load i32, i32* %casted, align 4
143143
call void @foo(i32 %v1)
144144
%cmp = icmp eq i32 %v1, 0
@@ -164,15 +164,15 @@ entry:
164164

165165
if.then:
166166
; CHECK-LABEL: if.then:
167-
; CHECK: getelementptr i8, {{.+}} 40
167+
; CHECK: getelementptr inbounds i8, {{.+}} 40
168168
%v1 = load i32, i32* %casted, align 4
169169
call void @foo(i32 %v1)
170170
%cmp = icmp eq i32 %v1, 0
171171
br i1 %cmp, label %rare.1, label %next
172172

173173
next:
174174
; CHECK-LABEL: next:
175-
; CHECK: getelementptr i8, {{.+}} 40
175+
; CHECK: getelementptr inbounds i8, {{.+}} 40
176176
%v2 = load i32, i32* %casted, align 4
177177
call void @foo(i32 %v2)
178178
%cmp2 = icmp eq i32 %v2, 0
@@ -183,13 +183,13 @@ fallthrough:
183183

184184
rare.1:
185185
; CHECK-LABEL: rare.1:
186-
; CHECK: getelementptr i8, {{.+}} 40
186+
; CHECK: getelementptr inbounds i8, {{.+}} 40
187187
call void @slowpath(i32 %v1, i32* %casted) cold
188188
br label %next
189189

190190
rare.2:
191191
; CHECK-LABEL: rare.2:
192-
; CHECK: getelementptr i8, {{.+}} 40
192+
; CHECK: getelementptr inbounds i8, {{.+}} 40
193193
call void @slowpath(i32 %v2, i32* %casted) cold
194194
br label %fallthrough
195195
}
@@ -240,7 +240,7 @@ if.then:
240240

241241
backedge:
242242
; CHECK-LABEL: backedge:
243-
; CHECK: getelementptr i8, {{.+}} 40
243+
; CHECK: getelementptr inbounds i8, {{.+}} 40
244244
%casted.merged = phi i32* [%casted.loop, %header], [%casted.1, %if.then]
245245
%v = load i32, i32* %casted.merged, align 4
246246
call void @foo(i32 %v)
@@ -256,7 +256,7 @@ exit:
256256
; address computation.
257257
define void @test10(i1 %cond, i64* %base) {
258258
; CHECK-LABEL: @test10
259-
; CHECK: getelementptr i8, {{.+}} 40
259+
; CHECK: getelementptr inbounds i8, {{.+}} 40
260260
; CHECK-NOT: select
261261
entry:
262262
%gep1 = getelementptr inbounds i64, i64* %base, i64 5

llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu"
66

77
; CHECK-LABEL: @load_cast_gep
88
; GEP: [[CAST:%[0-9]+]] = addrspacecast i64* %base to i8 addrspace(1)*
9-
; GEP: getelementptr i8, i8 addrspace(1)* [[CAST]], i64 40
9+
; GEP: getelementptr inbounds i8, i8 addrspace(1)* [[CAST]], i64 40
1010
define void @load_cast_gep(i1 %cond, i64* %base) {
1111
entry:
1212
%addr = getelementptr inbounds i64, i64* %base, i64 5
@@ -23,7 +23,7 @@ fallthrough:
2323

2424
; CHECK-LABEL: @store_gep_cast
2525
; GEP: [[CAST:%[0-9]+]] = addrspacecast i64* %base to i8 addrspace(1)*
26-
; GEP: getelementptr i8, i8 addrspace(1)* [[CAST]], i64 20
26+
; GEP: getelementptr inbounds i8, i8 addrspace(1)* [[CAST]], i64 20
2727
define void @store_gep_cast(i1 %cond, i64* %base) {
2828
entry:
2929
%casted = addrspacecast i64* %base to i32 addrspace(1)*

0 commit comments

Comments
 (0)