Skip to content

Commit 9e5a77f

Browse files
committed
[SeparateConstOffsetFromGEP] Always emit i8 gep
Always emit canonical i8 GEPs, don't try to preserve the original element type. As this is a backend pass, trying to preserve the type is not useful.
1 parent 08da7ac commit 9e5a77f

File tree

9 files changed

+93
-135
lines changed

9 files changed

+93
-135
lines changed

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

Lines changed: 8 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,67 +1093,25 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
10931093
// => add the offset
10941094
//
10951095
// %gep2 ; clone of %gep
1096-
// %new.gep = gep %gep2, <offset / sizeof(*%gep)>
1096+
// %new.gep = gep i8, %gep2, %offset
10971097
// %gep ; will be removed
10981098
// ... %gep ...
10991099
//
11001100
// => replace all uses of %gep with %new.gep and remove %gep
11011101
//
11021102
// %gep2 ; clone of %gep
1103-
// %new.gep = gep %gep2, <offset / sizeof(*%gep)>
1104-
// ... %new.gep ...
1105-
//
1106-
// If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an
1107-
// uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep):
1108-
// bitcast %gep2 to i8*, add the offset, and bitcast the result back to the
1109-
// type of %gep.
1110-
//
1111-
// %gep2 ; clone of %gep
1112-
// %0 = bitcast %gep2 to i8*
1113-
// %uglygep = gep %0, <offset>
1114-
// %new.gep = bitcast %uglygep to <type of %gep>
1103+
// %new.gep = gep i8, %gep2, %offset
11151104
// ... %new.gep ...
11161105
Instruction *NewGEP = GEP->clone();
11171106
NewGEP->insertBefore(GEP);
11181107

1119-
// Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned =
1120-
// unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is
1121-
// used with unsigned integers later.
1122-
int64_t ElementTypeSizeOfGEP = static_cast<int64_t>(
1123-
DL->getTypeAllocSize(GEP->getResultElementType()));
11241108
Type *PtrIdxTy = DL->getIndexType(GEP->getType());
1125-
if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
1126-
// Very likely. As long as %gep is naturally aligned, the byte offset we
1127-
// extracted should be a multiple of sizeof(*%gep).
1128-
int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
1129-
NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
1130-
ConstantInt::get(PtrIdxTy, Index, true),
1131-
GEP->getName(), GEP);
1132-
NewGEP->copyMetadata(*GEP);
1133-
// Inherit the inbounds attribute of the original GEP.
1134-
cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
1135-
} else {
1136-
// Unlikely but possible. For example,
1137-
// #pragma pack(1)
1138-
// struct S {
1139-
// int a[3];
1140-
// int64 b[8];
1141-
// };
1142-
// #pragma pack()
1143-
//
1144-
// Suppose the gep before extraction is &s[i + 1].b[j + 3]. After
1145-
// extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is
1146-
// sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of
1147-
// sizeof(int64).
1148-
//
1149-
// Emit an uglygep in this case.
1150-
IRBuilder<> Builder(GEP);
1151-
NewGEP = cast<Instruction>(Builder.CreateGEP(
1152-
Builder.getInt8Ty(), NewGEP,
1153-
{ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true)}, "uglygep",
1154-
GEPWasInBounds));
1155-
NewGEP->copyMetadata(*GEP);
1156-
}
1109+
IRBuilder<> Builder(GEP);
1110+
NewGEP = cast<Instruction>(Builder.CreateGEP(
1111+
Builder.getInt8Ty(), NewGEP,
1112+
{ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true)},
1113+
GEP->getName(), GEPWasInBounds));
1114+
NewGEP->copyMetadata(*GEP);
11571115

11581116
GEP->replaceAllUsesWith(NewGEP);
11591117
GEP->eraseFromParent();

llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/scalable-vector-geps.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ define ptr @test1(ptr %base, i64 %idx) #0 {
2020
define ptr @test2(ptr %base, i64 %idx) {
2121
; CHECK-LABEL: @test2(
2222
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 3, i64 [[IDX:%.*]]
23-
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[TMP1]], i64 1
23+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4
2424
; CHECK-NEXT: ret ptr [[GEP2]]
2525
;
2626
%idx.next = add nuw nsw i64 %idx, 1
@@ -57,7 +57,7 @@ define ptr @test4(ptr %base, i64 %idx) {
5757
define ptr @test5(ptr %base, i64 %idx) {
5858
; CHECK-LABEL: @test5(
5959
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [8 x <vscale x 4 x float>], ptr [[BASE:%.*]], i64 1, i64 3, i64 [[IDX:%.*]]
60-
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[TMP1]], i64 1
60+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4
6161
; CHECK-NEXT: ret ptr [[GEP2]]
6262
;
6363
%idx.next = add nuw nsw i64 %idx, 1

llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ define amdgpu_kernel void @sum_of_array(i32 %x, i32 %y, ptr addrspace(1) nocaptu
1111
; IR-NEXT: [[TMP:%.*]] = sext i32 [[Y]] to i64
1212
; IR-NEXT: [[TMP1:%.*]] = sext i32 [[X]] to i64
1313
; IR-NEXT: [[TMP2:%.*]] = getelementptr [4096 x [32 x float]], ptr addrspace(4) @array, i64 0, i64 [[TMP1]], i64 [[TMP]]
14-
; IR-NEXT: [[TMP82:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP2]], i64 1
15-
; IR-NEXT: [[TMP144:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP2]], i64 32
16-
; IR-NEXT: [[TMP187:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP2]], i64 33
14+
; IR-NEXT: [[TMP82:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP2]], i64 4
15+
; IR-NEXT: [[TMP144:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP2]], i64 128
16+
; IR-NEXT: [[TMP187:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP2]], i64 132
1717
; IR-NEXT: store float 0.000000e+00, ptr addrspace(1) [[OUTPUT]], align 4
1818
; IR-NEXT: ret void
1919
;
@@ -51,7 +51,7 @@ define amdgpu_kernel void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, pt
5151
; IR-NEXT: [[TMP2:%.*]] = getelementptr [4096 x [4 x float]], ptr addrspace(4) @array2, i64 0, i64 [[TMP1]], i64 [[TMP]]
5252
; IR-NEXT: [[TMP6:%.*]] = add i32 [[Y]], 255
5353
; IR-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
54-
; IR-NEXT: [[TMP82:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP2]], i64 255
54+
; IR-NEXT: [[TMP82:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP2]], i64 1020
5555
; IR-NEXT: [[TMP12:%.*]] = add i32 [[X]], 256
5656
; IR-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64
5757
; IR-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4096 x [4 x float]], ptr addrspace(4) @array2, i64 0, i64 [[TMP13]], i64 [[TMP]]
@@ -91,13 +91,13 @@ define amdgpu_kernel void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y
9191
; IR-NEXT: [[TMP2:%.*]] = getelementptr [4096 x [4 x float]], ptr addrspace(3) @lds_array, i32 0, i32 [[X]], i32 [[Y]]
9292
; IR-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(3) [[TMP2]], align 4
9393
; IR-NEXT: [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00
94-
; IR-NEXT: [[TMP82:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP2]], i32 255
94+
; IR-NEXT: [[TMP82:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i32 1020
9595
; IR-NEXT: [[TMP10:%.*]] = load float, ptr addrspace(3) [[TMP82]], align 4
9696
; IR-NEXT: [[TMP11:%.*]] = fadd float [[TMP5]], [[TMP10]]
97-
; IR-NEXT: [[TMP144:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP2]], i32 16128
97+
; IR-NEXT: [[TMP144:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i32 64512
9898
; IR-NEXT: [[TMP16:%.*]] = load float, ptr addrspace(3) [[TMP144]], align 4
9999
; IR-NEXT: [[TMP17:%.*]] = fadd float [[TMP11]], [[TMP16]]
100-
; IR-NEXT: [[TMP187:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP2]], i32 16383
100+
; IR-NEXT: [[TMP187:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i32 65532
101101
; IR-NEXT: [[TMP20:%.*]] = load float, ptr addrspace(3) [[TMP187]], align 4
102102
; IR-NEXT: [[TMP21:%.*]] = fadd float [[TMP17]], [[TMP20]]
103103
; IR-NEXT: store float [[TMP21]], ptr addrspace(1) [[OUTPUT]], align 4
@@ -134,7 +134,7 @@ define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float,
134134
; IR-NEXT: [[TMP27:%.*]] = shl i32 [[TMP23]], 2
135135
; IR-NEXT: [[TMP28:%.*]] = sext i32 [[TMP27]] to i64
136136
; IR-NEXT: [[TMP29:%.*]] = getelementptr [0 x <4 x i32>], ptr addrspace(4) [[TMP1]], i64 0, i64 [[TMP28]], !amdgpu.uniform [[META0]]
137-
; IR-NEXT: [[TMP30:%.*]] = getelementptr <4 x i32>, ptr addrspace(4) [[TMP29]], i64 3, !amdgpu.uniform [[META0]]
137+
; IR-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP29]], i64 48, !amdgpu.uniform [[META0]]
138138
; IR-NEXT: [[TMP31:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP30]], align 16, !invariant.load [[META0]]
139139
; IR-NEXT: [[TMP32:%.*]] = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> [[TMP26]], <4 x i32> [[TMP31]], i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #[[ATTR3]]
140140
; IR-NEXT: [[TMP33:%.*]] = extractelement <4 x float> [[TMP32]], i32 0

llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,15 @@ define void @sum_of_array(i32 %x, i32 %y, ptr nocapture %output) {
2626
; IR-NEXT: [[I3:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr
2727
; IR-NEXT: [[I4:%.*]] = load float, ptr [[I3]], align 4
2828
; IR-NEXT: [[I5:%.*]] = fadd float [[I4]], 0.000000e+00
29-
; IR-NEXT: [[I87:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 1
29+
; IR-NEXT: [[I87:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 4
3030
; IR-NEXT: [[I9:%.*]] = addrspacecast ptr addrspace(3) [[I87]] to ptr
3131
; IR-NEXT: [[I10:%.*]] = load float, ptr [[I9]], align 4
3232
; IR-NEXT: [[I11:%.*]] = fadd float [[I5]], [[I10]]
33-
; IR-NEXT: [[I1412:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 32
33+
; IR-NEXT: [[I1412:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 128
3434
; IR-NEXT: [[I15:%.*]] = addrspacecast ptr addrspace(3) [[I1412]] to ptr
3535
; IR-NEXT: [[I16:%.*]] = load float, ptr [[I15]], align 4
3636
; IR-NEXT: [[I17:%.*]] = fadd float [[I11]], [[I16]]
37-
; IR-NEXT: [[I1818:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 33
37+
; IR-NEXT: [[I1818:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 132
3838
; IR-NEXT: [[I19:%.*]] = addrspacecast ptr addrspace(3) [[I1818]] to ptr
3939
; IR-NEXT: [[I20:%.*]] = load float, ptr [[I19]], align 4
4040
; IR-NEXT: [[I21:%.*]] = fadd float [[I17]], [[I20]]
@@ -88,15 +88,15 @@ define void @sum_of_array2(i32 %x, i32 %y, ptr nocapture %output) {
8888
; IR-NEXT: [[I3:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr
8989
; IR-NEXT: [[I4:%.*]] = load float, ptr [[I3]], align 4
9090
; IR-NEXT: [[I5:%.*]] = fadd float [[I4]], 0.000000e+00
91-
; IR-NEXT: [[I77:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 1
91+
; IR-NEXT: [[I77:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 4
9292
; IR-NEXT: [[I8:%.*]] = addrspacecast ptr addrspace(3) [[I77]] to ptr
9393
; IR-NEXT: [[I9:%.*]] = load float, ptr [[I8]], align 4
9494
; IR-NEXT: [[I10:%.*]] = fadd float [[I5]], [[I9]]
95-
; IR-NEXT: [[I1212:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 32
95+
; IR-NEXT: [[I1212:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 128
9696
; IR-NEXT: [[I13:%.*]] = addrspacecast ptr addrspace(3) [[I1212]] to ptr
9797
; IR-NEXT: [[I14:%.*]] = load float, ptr [[I13]], align 4
9898
; IR-NEXT: [[I15:%.*]] = fadd float [[I10]], [[I14]]
99-
; IR-NEXT: [[I1618:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 33
99+
; IR-NEXT: [[I1618:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 132
100100
; IR-NEXT: [[I17:%.*]] = addrspacecast ptr addrspace(3) [[I1618]] to ptr
101101
; IR-NEXT: [[I18:%.*]] = load float, ptr [[I17]], align 4
102102
; IR-NEXT: [[I19:%.*]] = fadd float [[I15]], [[I18]]
@@ -149,15 +149,15 @@ define void @sum_of_array3(i32 %x, i32 %y, ptr nocapture %output) {
149149
; IR-NEXT: [[I3:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr
150150
; IR-NEXT: [[I4:%.*]] = load float, ptr [[I3]], align 4
151151
; IR-NEXT: [[I5:%.*]] = fadd float [[I4]], 0.000000e+00
152-
; IR-NEXT: [[I87:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 1
152+
; IR-NEXT: [[I87:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 4
153153
; IR-NEXT: [[I9:%.*]] = addrspacecast ptr addrspace(3) [[I87]] to ptr
154154
; IR-NEXT: [[I10:%.*]] = load float, ptr [[I9]], align 4
155155
; IR-NEXT: [[I11:%.*]] = fadd float [[I5]], [[I10]]
156-
; IR-NEXT: [[I1412:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 32
156+
; IR-NEXT: [[I1412:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 128
157157
; IR-NEXT: [[I15:%.*]] = addrspacecast ptr addrspace(3) [[I1412]] to ptr
158158
; IR-NEXT: [[I16:%.*]] = load float, ptr [[I15]], align 4
159159
; IR-NEXT: [[I17:%.*]] = fadd float [[I11]], [[I16]]
160-
; IR-NEXT: [[I1818:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 33
160+
; IR-NEXT: [[I1818:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 132
161161
; IR-NEXT: [[I19:%.*]] = addrspacecast ptr addrspace(3) [[I1818]] to ptr
162162
; IR-NEXT: [[I20:%.*]] = load float, ptr [[I19]], align 4
163163
; IR-NEXT: [[I21:%.*]] = fadd float [[I17]], [[I20]]
@@ -209,15 +209,15 @@ define void @sum_of_array4(i32 %x, i32 %y, ptr nocapture %output) {
209209
; IR-NEXT: [[I3:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr
210210
; IR-NEXT: [[I4:%.*]] = load float, ptr [[I3]], align 4
211211
; IR-NEXT: [[I5:%.*]] = fadd float [[I4]], 0.000000e+00
212-
; IR-NEXT: [[I77:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 1
212+
; IR-NEXT: [[I77:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 4
213213
; IR-NEXT: [[I8:%.*]] = addrspacecast ptr addrspace(3) [[I77]] to ptr
214214
; IR-NEXT: [[I9:%.*]] = load float, ptr [[I8]], align 4
215215
; IR-NEXT: [[I10:%.*]] = fadd float [[I5]], [[I9]]
216-
; IR-NEXT: [[I1212:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 32
216+
; IR-NEXT: [[I1212:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 128
217217
; IR-NEXT: [[I13:%.*]] = addrspacecast ptr addrspace(3) [[I1212]] to ptr
218218
; IR-NEXT: [[I14:%.*]] = load float, ptr [[I13]], align 4
219219
; IR-NEXT: [[I15:%.*]] = fadd float [[I10]], [[I14]]
220-
; IR-NEXT: [[I1618:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[I2]], i32 33
220+
; IR-NEXT: [[I1618:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[I2]], i32 132
221221
; IR-NEXT: [[I17:%.*]] = addrspacecast ptr addrspace(3) [[I1618]] to ptr
222222
; IR-NEXT: [[I18:%.*]] = load float, ptr [[I17]], align 4
223223
; IR-NEXT: [[I19:%.*]] = fadd float [[I15]], [[I18]]
@@ -270,7 +270,7 @@ define void @reunion(i32 %x, i32 %y, ptr %input) {
270270
; IR-NEXT: [[P0:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[I]]
271271
; IR-NEXT: [[V0:%.*]] = load float, ptr [[P0]], align 4
272272
; IR-NEXT: call void @use(float [[V0]])
273-
; IR-NEXT: [[P13:%.*]] = getelementptr inbounds float, ptr [[P0]], i64 5
273+
; IR-NEXT: [[P13:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 20
274274
; IR-NEXT: [[V1:%.*]] = load float, ptr [[P13]], align 4
275275
; IR-NEXT: call void @use(float [[V1]])
276276
; IR-NEXT: ret void

0 commit comments

Comments
 (0)