Skip to content

Commit a736e2b

Browse files
committed
[SeparateConstOffsetFromGEP] Don't set unsound inbounds flag
The language reference says about inbounds geps that "if the getelementptr has any non-zero indices[...] [t]he base pointer has an in bounds address of the allocated object that it is based on [and] [d]uring the successive addition of offsets to the address, the resulting pointer must remain in bounds of the allocated object at each step." If (gep inbounds p, (a + 5)) is translated to (gep [inbounds] (gep p, a), 5) with p pointing to the beginning of an object and a=-4, as the example in the comments suggests, that's the case for neither of the resulting geps. Therefore, we need to clear the inbounds flag for both geps. We might want to use ValueTracking to check if a is known to be non-negative to preserve the inbounds flags. For the AMDGPU tests with scratch instructions, removing the unsound inbounds flag means that AMDGPUDAGToDAGISel::isFlatScratchBaseLegal sees no NUW flag at the pointer add, which prevents generation of scratch instructions with immediate offsets. It's not clear to me what test/CodeGen/AMDGPU/constant-address-space-32bit.ll tests and if my fix preserves that. For SWDEV-516125.
1 parent 9b1ce47 commit a736e2b

File tree

14 files changed

+464
-330
lines changed

14 files changed

+464
-330
lines changed

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1092,7 +1092,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
10921092
// is transformed to:
10931093
//
10941094
// addr2 = gep float, float* p, i64 a ; inbounds removed
1095-
// addr = gep inbounds float, float* addr2, i64 5
1095+
// addr = gep float, float* addr2, i64 5 ; inbounds removed
10961096
//
10971097
// If a is -4, although the old index b is in bounds, the new index a is
10981098
// off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
@@ -1103,7 +1103,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
11031103
// TODO(jingyue): do some range analysis to keep as many inbounds as
11041104
// possible. GEPs with inbounds are more friendly to alias analysis.
11051105
// TODO(gep_nowrap): Preserve nuw at least.
1106-
bool GEPWasInBounds = GEP->isInBounds();
1106+
auto NewGEPFlags = GEPNoWrapFlags::none();
11071107
GEP->setNoWrapFlags(GEPNoWrapFlags::none());
11081108

11091109
// Lowers a GEP to either GEPs with a single index or arithmetic operations.
@@ -1153,7 +1153,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
11531153
IRBuilder<> Builder(GEP);
11541154
NewGEP = cast<Instruction>(Builder.CreatePtrAdd(
11551155
NewGEP, ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true),
1156-
GEP->getName(), GEPWasInBounds));
1156+
GEP->getName(), NewGEPFlags));
11571157
NewGEP->copyMetadata(*GEP);
11581158

11591159
GEP->replaceAllUsesWith(NewGEP);

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll

Lines changed: 208 additions & 144 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,9 @@ define amdgpu_vs i32 @load_i32_hifffffff0(ptr addrspace(6) inreg %p) #4 {
228228
; GCN: v_readfirstlane_b32
229229
; SI: s_nop
230230
; GCN: s_load_dwordx8
231+
; GCN-NEXT: v_readfirstlane_b32
232+
; SI-NEXT: v_mov_b32_e32
233+
; SI-NEXT: s_nop
231234
; GCN-NEXT: s_load_dwordx4
232235
; GCN: image_sample
233236
define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler(ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
@@ -260,6 +263,9 @@ main_body:
260263
; GCN: v_readfirstlane_b32
261264
; SI: s_nop
262265
; GCN: s_load_dwordx8
266+
; GCN-NEXT: v_readfirstlane_b32
267+
; SI-NEXT: v_mov_b32_e32
268+
; SI-NEXT: s_nop
263269
; GCN-NEXT: s_load_dwordx4
264270
; GCN: image_sample
265271
define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler_nouniform(ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {

llvm/test/CodeGen/AMDGPU/flat-scratch.ll

Lines changed: 132 additions & 100 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/memory_clause.ll

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -208,20 +208,26 @@ define void @mubuf_clause(ptr addrspace(5) noalias nocapture readonly %arg, ptr
208208
; GCN-SCRATCH-NEXT: v_lshlrev_b32_e32 v2, 4, v31
209209
; GCN-SCRATCH-NEXT: v_and_b32_e32 v18, 0x3ff0, v2
210210
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v0, v0, v18
211+
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v6, 16, v0
212+
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v10, 32, v0
213+
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v14, 48, v0
211214
; GCN-SCRATCH-NEXT: s_clause 0x3
212215
; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[2:5], v0, off
213-
; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[6:9], v0, off offset:16
214-
; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[10:13], v0, off offset:32
215-
; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[14:17], v0, off offset:48
216+
; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[6:9], v6, off
217+
; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[10:13], v10, off
218+
; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[14:17], v14, off
216219
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v0, v1, v18
220+
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v1, 16, v0
221+
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v18, 32, v0
222+
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v19, 48, v0
217223
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(3)
218224
; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v0, v[2:5], off
219225
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(2)
220-
; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v0, v[6:9], off offset:16
226+
; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v1, v[6:9], off
221227
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(1)
222-
; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v0, v[10:13], off offset:32
228+
; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v18, v[10:13], off
223229
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0)
224-
; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v0, v[14:17], off offset:48
230+
; GCN-SCRATCH-NEXT: scratch_store_dwordx4 v19, v[14:17], off
225231
; GCN-SCRATCH-NEXT: s_setpc_b64 s[30:31]
226232
bb:
227233
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=separate-const-offset-from-gep -S | FileCheck %s
3+
4+
; The inbounds flags cannot be preserved here: If the pointers point to the
5+
; beginning of an object and %i is 1, the intermediate GEPs are out of bounds.
6+
define void @maybe_oob(ptr %dst, ptr %src, i64 %i) {
7+
; CHECK-LABEL: @maybe_oob(
8+
; CHECK-NEXT: entry:
9+
; CHECK-NEXT: [[IDX1:%.*]] = sub i64 0, [[I:%.*]]
10+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[SRC:%.*]], i64 [[IDX1]]
11+
; CHECK-NEXT: [[ARRAYIDX_SRC2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4
12+
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX_SRC2]], align 4
13+
; CHECK-NEXT: [[IDX3:%.*]] = sub i64 0, [[I]]
14+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[DST:%.*]], i64 [[IDX3]]
15+
; CHECK-NEXT: [[ARRAYIDX_DST4:%.*]] = getelementptr i8, ptr [[TMP2]], i64 4
16+
; CHECK-NEXT: store float [[TMP1]], ptr [[ARRAYIDX_DST4]], align 4
17+
; CHECK-NEXT: ret void
18+
;
19+
entry:
20+
%idx = sub nsw i64 1, %i
21+
%arrayidx.src = getelementptr inbounds float, ptr %src, i64 %idx
22+
%3 = load float, ptr %arrayidx.src, align 4
23+
%arrayidx.dst = getelementptr inbounds float, ptr %dst, i64 %idx
24+
store float %3, ptr %arrayidx.dst, align 4
25+
ret void
26+
}

llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ define amdgpu_kernel void @sum_of_array(i32 %x, i32 %y, ptr addrspace(1) nocaptu
1111
; IR-NEXT: [[TMP:%.*]] = sext i32 [[Y]] to i64
1212
; IR-NEXT: [[TMP1:%.*]] = sext i32 [[X]] to i64
1313
; IR-NEXT: [[TMP2:%.*]] = getelementptr [4096 x [32 x float]], ptr addrspace(4) @array, i64 0, i64 [[TMP1]], i64 [[TMP]]
14-
; IR-NEXT: [[TMP82:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP2]], i64 4
15-
; IR-NEXT: [[TMP144:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP2]], i64 128
16-
; IR-NEXT: [[TMP187:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP2]], i64 132
14+
; IR-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP2]], i64 4
15+
; IR-NEXT: [[TMP144:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP2]], i64 128
16+
; IR-NEXT: [[TMP187:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP2]], i64 132
1717
; IR-NEXT: store float 0.000000e+00, ptr addrspace(1) [[OUTPUT]], align 4
1818
; IR-NEXT: ret void
1919
;
@@ -51,7 +51,7 @@ define amdgpu_kernel void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, pt
5151
; IR-NEXT: [[TMP2:%.*]] = getelementptr [4096 x [4 x float]], ptr addrspace(4) @array2, i64 0, i64 [[TMP1]], i64 [[TMP]]
5252
; IR-NEXT: [[TMP6:%.*]] = add i32 [[Y]], 255
5353
; IR-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
54-
; IR-NEXT: [[TMP82:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP2]], i64 1020
54+
; IR-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP2]], i64 1020
5555
; IR-NEXT: [[TMP12:%.*]] = add i32 [[X]], 256
5656
; IR-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64
5757
; IR-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4096 x [4 x float]], ptr addrspace(4) @array2, i64 0, i64 [[TMP13]], i64 [[TMP]]
@@ -91,13 +91,13 @@ define amdgpu_kernel void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y
9191
; IR-NEXT: [[TMP2:%.*]] = getelementptr [4096 x [4 x float]], ptr addrspace(3) @lds_array, i32 0, i32 [[X]], i32 [[Y]]
9292
; IR-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(3) [[TMP2]], align 4
9393
; IR-NEXT: [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00
94-
; IR-NEXT: [[TMP82:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i32 1020
94+
; IR-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i32 1020
9595
; IR-NEXT: [[TMP10:%.*]] = load float, ptr addrspace(3) [[TMP82]], align 4
9696
; IR-NEXT: [[TMP11:%.*]] = fadd float [[TMP5]], [[TMP10]]
97-
; IR-NEXT: [[TMP144:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i32 64512
97+
; IR-NEXT: [[TMP144:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i32 64512
9898
; IR-NEXT: [[TMP16:%.*]] = load float, ptr addrspace(3) [[TMP144]], align 4
9999
; IR-NEXT: [[TMP17:%.*]] = fadd float [[TMP11]], [[TMP16]]
100-
; IR-NEXT: [[TMP187:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i32 65532
100+
; IR-NEXT: [[TMP187:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i32 65532
101101
; IR-NEXT: [[TMP20:%.*]] = load float, ptr addrspace(3) [[TMP187]], align 4
102102
; IR-NEXT: [[TMP21:%.*]] = fadd float [[TMP17]], [[TMP20]]
103103
; IR-NEXT: store float [[TMP21]], ptr addrspace(1) [[OUTPUT]], align 4

llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,15 @@ define void @sum_of_array(i32 %x, i32 %y, ptr nocapture %output) {
2626
; IR-NEXT: [[I3:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr
2727
; IR-NEXT: [[I4:%.*]] = load float, ptr [[I3]], align 4
2828
; IR-NEXT: [[I5:%.*]] = fadd float [[I4]], 0.000000e+00
29-
; IR-NEXT: [[I87:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 4
29+
; IR-NEXT: [[I87:%.*]] = getelementptr i8, ptr addrspace(3) [[I2]], i32 4
3030
; IR-NEXT: [[I9:%.*]] = addrspacecast ptr addrspace(3) [[I87]] to ptr
3131
; IR-NEXT: [[I10:%.*]] = load float, ptr [[I9]], align 4
3232
; IR-NEXT: [[I11:%.*]] = fadd float [[I5]], [[I10]]
33-
; IR-NEXT: [[I1412:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 128
33+
; IR-NEXT: [[I1412:%.*]] = getelementptr i8, ptr addrspace(3) [[I2]], i32 128
3434
; IR-NEXT: [[I15:%.*]] = addrspacecast ptr addrspace(3) [[I1412]] to ptr
3535
; IR-NEXT: [[I16:%.*]] = load float, ptr [[I15]], align 4
3636
; IR-NEXT: [[I17:%.*]] = fadd float [[I11]], [[I16]]
37-
; IR-NEXT: [[I1818:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 132
37+
; IR-NEXT: [[I1818:%.*]] = getelementptr i8, ptr addrspace(3) [[I2]], i32 132
3838
; IR-NEXT: [[I19:%.*]] = addrspacecast ptr addrspace(3) [[I1818]] to ptr
3939
; IR-NEXT: [[I20:%.*]] = load float, ptr [[I19]], align 4
4040
; IR-NEXT: [[I21:%.*]] = fadd float [[I17]], [[I20]]
@@ -88,15 +88,15 @@ define void @sum_of_array2(i32 %x, i32 %y, ptr nocapture %output) {
8888
; IR-NEXT: [[I3:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr
8989
; IR-NEXT: [[I4:%.*]] = load float, ptr [[I3]], align 4
9090
; IR-NEXT: [[I5:%.*]] = fadd float [[I4]], 0.000000e+00
91-
; IR-NEXT: [[I77:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 4
91+
; IR-NEXT: [[I77:%.*]] = getelementptr i8, ptr addrspace(3) [[I2]], i32 4
9292
; IR-NEXT: [[I8:%.*]] = addrspacecast ptr addrspace(3) [[I77]] to ptr
9393
; IR-NEXT: [[I9:%.*]] = load float, ptr [[I8]], align 4
9494
; IR-NEXT: [[I10:%.*]] = fadd float [[I5]], [[I9]]
95-
; IR-NEXT: [[I1212:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 128
95+
; IR-NEXT: [[I1212:%.*]] = getelementptr i8, ptr addrspace(3) [[I2]], i32 128
9696
; IR-NEXT: [[I13:%.*]] = addrspacecast ptr addrspace(3) [[I1212]] to ptr
9797
; IR-NEXT: [[I14:%.*]] = load float, ptr [[I13]], align 4
9898
; IR-NEXT: [[I15:%.*]] = fadd float [[I10]], [[I14]]
99-
; IR-NEXT: [[I1618:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 132
99+
; IR-NEXT: [[I1618:%.*]] = getelementptr i8, ptr addrspace(3) [[I2]], i32 132
100100
; IR-NEXT: [[I17:%.*]] = addrspacecast ptr addrspace(3) [[I1618]] to ptr
101101
; IR-NEXT: [[I18:%.*]] = load float, ptr [[I17]], align 4
102102
; IR-NEXT: [[I19:%.*]] = fadd float [[I15]], [[I18]]
@@ -149,15 +149,15 @@ define void @sum_of_array3(i32 %x, i32 %y, ptr nocapture %output) {
149149
; IR-NEXT: [[I3:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr
150150
; IR-NEXT: [[I4:%.*]] = load float, ptr [[I3]], align 4
151151
; IR-NEXT: [[I5:%.*]] = fadd float [[I4]], 0.000000e+00
152-
; IR-NEXT: [[I87:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 4
152+
; IR-NEXT: [[I87:%.*]] = getelementptr i8, ptr addrspace(3) [[I2]], i32 4
153153
; IR-NEXT: [[I9:%.*]] = addrspacecast ptr addrspace(3) [[I87]] to ptr
154154
; IR-NEXT: [[I10:%.*]] = load float, ptr [[I9]], align 4
155155
; IR-NEXT: [[I11:%.*]] = fadd float [[I5]], [[I10]]
156-
; IR-NEXT: [[I1412:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 128
156+
; IR-NEXT: [[I1412:%.*]] = getelementptr i8, ptr addrspace(3) [[I2]], i32 128
157157
; IR-NEXT: [[I15:%.*]] = addrspacecast ptr addrspace(3) [[I1412]] to ptr
158158
; IR-NEXT: [[I16:%.*]] = load float, ptr [[I15]], align 4
159159
; IR-NEXT: [[I17:%.*]] = fadd float [[I11]], [[I16]]
160-
; IR-NEXT: [[I1818:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 132
160+
; IR-NEXT: [[I1818:%.*]] = getelementptr i8, ptr addrspace(3) [[I2]], i32 132
161161
; IR-NEXT: [[I19:%.*]] = addrspacecast ptr addrspace(3) [[I1818]] to ptr
162162
; IR-NEXT: [[I20:%.*]] = load float, ptr [[I19]], align 4
163163
; IR-NEXT: [[I21:%.*]] = fadd float [[I17]], [[I20]]
@@ -209,15 +209,15 @@ define void @sum_of_array4(i32 %x, i32 %y, ptr nocapture %output) {
209209
; IR-NEXT: [[I3:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr
210210
; IR-NEXT: [[I4:%.*]] = load float, ptr [[I3]], align 4
211211
; IR-NEXT: [[I5:%.*]] = fadd float [[I4]], 0.000000e+00
212-
; IR-NEXT: [[I77:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 4
212+
; IR-NEXT: [[I77:%.*]] = getelementptr i8, ptr addrspace(3) [[I2]], i32 4
213213
; IR-NEXT: [[I8:%.*]] = addrspacecast ptr addrspace(3) [[I77]] to ptr
214214
; IR-NEXT: [[I9:%.*]] = load float, ptr [[I8]], align 4
215215
; IR-NEXT: [[I10:%.*]] = fadd float [[I5]], [[I9]]
216-
; IR-NEXT: [[I1212:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 128
216+
; IR-NEXT: [[I1212:%.*]] = getelementptr i8, ptr addrspace(3) [[I2]], i32 128
217217
; IR-NEXT: [[I13:%.*]] = addrspacecast ptr addrspace(3) [[I1212]] to ptr
218218
; IR-NEXT: [[I14:%.*]] = load float, ptr [[I13]], align 4
219219
; IR-NEXT: [[I15:%.*]] = fadd float [[I10]], [[I14]]
220-
; IR-NEXT: [[I1618:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 132
220+
; IR-NEXT: [[I1618:%.*]] = getelementptr i8, ptr addrspace(3) [[I2]], i32 132
221221
; IR-NEXT: [[I17:%.*]] = addrspacecast ptr addrspace(3) [[I1618]] to ptr
222222
; IR-NEXT: [[I18:%.*]] = load float, ptr [[I17]], align 4
223223
; IR-NEXT: [[I19:%.*]] = fadd float [[I15]], [[I18]]
@@ -270,7 +270,7 @@ define void @reunion(i32 %x, i32 %y, ptr %input) {
270270
; IR-NEXT: [[P0:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[I]]
271271
; IR-NEXT: [[V0:%.*]] = load float, ptr [[P0]], align 4
272272
; IR-NEXT: call void @use(float [[V0]])
273-
; IR-NEXT: [[P13:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 20
273+
; IR-NEXT: [[P13:%.*]] = getelementptr i8, ptr [[P0]], i64 20
274274
; IR-NEXT: [[V1:%.*]] = load float, ptr [[P13]], align 4
275275
; IR-NEXT: call void @use(float [[V1]])
276276
; IR-NEXT: ret void

llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ define ptr addrspace(3) @packed_struct(i32 %i, i32 %j) {
1414
; CHECK-NEXT: entry:
1515
; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 0 to i32
1616
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [1024 x %struct.Packed], ptr addrspace(3) @packed_struct_array, i32 [[IDXPROM]], i32 [[I:%.*]], i32 1, i32 [[J:%.*]]
17-
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP0]], i32 100
17+
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i32 100
1818
; CHECK-NEXT: ret ptr addrspace(3) [[UGLYGEP]]
1919
;
2020
entry:

0 commit comments

Comments
 (0)