Skip to content

Commit 9bc4355

Browse files
committed
[SLSR] Always generate i8 GEPs
Always generate canonical i8 GEPs. Especially as this is a backend pass, trying to generate a "nice" GEP representation is not useful.
1 parent 76482b7 commit 9bc4355

File tree

4 files changed

+34
-62
lines changed

4 files changed

+34
-62
lines changed

llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp

Lines changed: 10 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -233,13 +233,9 @@ class StraightLineStrengthReduce {
233233
void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize,
234234
GetElementPtrInst *GEP);
235235

236-
// Emit code that computes the "bump" from Basis to C. If the candidate is a
237-
// GEP and the bump is not divisible by the element size of the GEP, this
238-
// function sets the BumpWithUglyGEP flag to notify its caller to bump the
239-
// basis using an ugly GEP.
236+
// Emit code that computes the "bump" from Basis to C.
240237
static Value *emitBump(const Candidate &Basis, const Candidate &C,
241-
IRBuilder<> &Builder, const DataLayout *DL,
242-
bool &BumpWithUglyGEP);
238+
IRBuilder<> &Builder, const DataLayout *DL);
243239

244240
const DataLayout *DL = nullptr;
245241
DominatorTree *DT = nullptr;
@@ -581,26 +577,11 @@ static void unifyBitWidth(APInt &A, APInt &B) {
581577
Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
582578
const Candidate &C,
583579
IRBuilder<> &Builder,
584-
const DataLayout *DL,
585-
bool &BumpWithUglyGEP) {
580+
const DataLayout *DL) {
586581
APInt Idx = C.Index->getValue(), BasisIdx = Basis.Index->getValue();
587582
unifyBitWidth(Idx, BasisIdx);
588583
APInt IndexOffset = Idx - BasisIdx;
589584

590-
BumpWithUglyGEP = false;
591-
if (Basis.CandidateKind == Candidate::GEP) {
592-
APInt ElementSize(
593-
IndexOffset.getBitWidth(),
594-
DL->getTypeAllocSize(
595-
cast<GetElementPtrInst>(Basis.Ins)->getResultElementType()));
596-
APInt Q, R;
597-
APInt::sdivrem(IndexOffset, ElementSize, Q, R);
598-
if (R == 0)
599-
IndexOffset = Q;
600-
else
601-
BumpWithUglyGEP = true;
602-
}
603-
604585
// Compute Bump = C - Basis = (i' - i) * S.
605586
// Common case 1: if (i' - i) is 1, Bump = S.
606587
if (IndexOffset == 1)
@@ -645,8 +626,7 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
645626
return;
646627

647628
IRBuilder<> Builder(C.Ins);
648-
bool BumpWithUglyGEP;
649-
Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP);
629+
Value *Bump = emitBump(Basis, C, Builder, DL);
650630
Value *Reduced = nullptr; // equivalent to but weaker than C.Ins
651631
switch (C.CandidateKind) {
652632
case Candidate::Add:
@@ -673,28 +653,13 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
673653
}
674654
break;
675655
}
676-
case Candidate::GEP:
677-
{
678-
Type *OffsetTy = DL->getIndexType(C.Ins->getType());
656+
case Candidate::GEP: {
679657
bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
680-
if (BumpWithUglyGEP) {
681-
// C = (char *)Basis + Bump
682-
unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
683-
Type *CharTy = PointerType::get(Basis.Ins->getContext(), AS);
684-
Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
685-
Reduced =
686-
Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump, "", InBounds);
687-
Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
688-
} else {
689-
// C = gep Basis, Bump
690-
// Canonicalize bump to pointer size.
691-
Bump = Builder.CreateSExtOrTrunc(Bump, OffsetTy);
692-
Reduced = Builder.CreateGEP(
693-
cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(), Basis.Ins,
694-
Bump, "", InBounds);
695-
}
696-
break;
697-
}
658+
// C = (char *)Basis + Bump
659+
Reduced =
660+
Builder.CreateGEP(Builder.getInt8Ty(), Basis.Ins, Bump, "", InBounds);
661+
break;
662+
}
698663
default:
699664
llvm_unreachable("C.CandidateKind is invalid");
700665
};

llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@ define amdgpu_kernel void @slsr_after_reassociate_global_geps_mubuf_max_offset(p
1313
; CHECK-NEXT: [[P12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[TMP1]], i64 1023
1414
; CHECK-NEXT: [[V11:%.*]] = load i32, ptr addrspace(1) [[P12]], align 4
1515
; CHECK-NEXT: store i32 [[V11]], ptr addrspace(1) [[OUT]], align 4
16-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr addrspace(1) [[TMP1]], i64 [[TMP0]]
17-
; CHECK-NEXT: [[P24:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[TMP2]], i64 1023
16+
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 2
17+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP1]], i64 [[TMP2]]
18+
; CHECK-NEXT: [[P24:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[TMP3]], i64 1023
1819
; CHECK-NEXT: [[V22:%.*]] = load i32, ptr addrspace(1) [[P24]], align 4
1920
; CHECK-NEXT: store i32 [[V22]], ptr addrspace(1) [[OUT]], align 4
2021
; CHECK-NEXT: ret void
@@ -78,8 +79,9 @@ define amdgpu_kernel void @slsr_after_reassociate_lds_geps_ds_max_offset(ptr add
7879
; CHECK-NEXT: [[P12:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP0]], i32 16383
7980
; CHECK-NEXT: [[V11:%.*]] = load i32, ptr addrspace(3) [[P12]], align 4
8081
; CHECK-NEXT: store i32 [[V11]], ptr addrspace(1) [[OUT]], align 4
81-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr addrspace(3) [[TMP0]], i32 [[I]]
82-
; CHECK-NEXT: [[P24:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP1]], i32 16383
82+
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[I]], 2
83+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i32 [[TMP1]]
84+
; CHECK-NEXT: [[P24:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP2]], i32 16383
8385
; CHECK-NEXT: [[V22:%.*]] = load i32, ptr addrspace(3) [[P24]], align 4
8486
; CHECK-NEXT: store i32 [[V22]], ptr addrspace(1) [[OUT]], align 4
8587
; CHECK-NEXT: ret void

llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/reassociate-geps-and-slsr.ll

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,17 @@ define void @slsr_after_reassociate_geps(ptr %arr, i32 %i) {
3535
; CHECK-NEXT: [[P12:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 5
3636
; CHECK-NEXT: [[V1:%.*]] = load float, ptr [[P12]], align 4
3737
; CHECK-NEXT: call void @foo(float [[V1]])
38-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[TMP2]], i64 [[TMP1]]
39-
; CHECK-NEXT: [[P24:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 5
38+
; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 2
39+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP3]]
40+
; CHECK-NEXT: [[P24:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 5
4041
; CHECK-NEXT: [[V2:%.*]] = load float, ptr [[P24]], align 4
4142
; CHECK-NEXT: call void @foo(float [[V2]])
42-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP3]], i64 [[TMP1]]
43-
; CHECK-NEXT: [[P36:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 5
43+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 [[TMP3]]
44+
; CHECK-NEXT: [[P36:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 5
4445
; CHECK-NEXT: [[V3:%.*]] = load float, ptr [[P36]], align 4
4546
; CHECK-NEXT: call void @foo(float [[V3]])
46-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP4]], i64 [[TMP1]]
47-
; CHECK-NEXT: [[P48:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 5
47+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i64 [[TMP3]]
48+
; CHECK-NEXT: [[P48:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 5
4849
; CHECK-NEXT: [[V4:%.*]] = load float, ptr [[P48]], align 4
4950
; CHECK-NEXT: call void @foo(float [[V4]])
5051
; CHECK-NEXT: ret void

llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ define void @slsr_gep(ptr %input, i64 %s) {
1919
; CHECK-NEXT: call void @foo(ptr [[INPUT]])
2020
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i32, ptr [[INPUT]], i64 [[S]]
2121
; CHECK-NEXT: call void @foo(ptr [[P1]])
22-
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[S]]
22+
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[S]], 2
23+
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP1]]
2324
; CHECK-NEXT: call void @foo(ptr [[P2]])
2425
; CHECK-NEXT: ret void
2526
;
@@ -55,7 +56,8 @@ define void @slsr_gep_sext(ptr %input, i32 %s) {
5556
; CHECK-NEXT: [[T:%.*]] = sext i32 [[S]] to i64
5657
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i32, ptr [[INPUT]], i64 [[T]]
5758
; CHECK-NEXT: call void @foo(ptr [[P1]])
58-
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[T]]
59+
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[T]], 2
60+
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP1]]
5961
; CHECK-NEXT: call void @foo(ptr [[P2]])
6062
; CHECK-NEXT: ret void
6163
;
@@ -92,10 +94,10 @@ define void @slsr_gep_2d(ptr %input, i64 %s, i64 %t) {
9294
; CHECK-SAME: ptr [[INPUT:%.*]], i64 [[S:%.*]], i64 [[T:%.*]]) {
9395
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds [10 x [5 x i32]], ptr [[INPUT]], i64 0, i64 [[S]], i64 [[T]]
9496
; CHECK-NEXT: call void @foo(ptr [[P0]])
95-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[S]], 5
96-
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i32, ptr [[P0]], i64 [[TMP1]]
97+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[S]], 20
98+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 [[TMP1]]
9799
; CHECK-NEXT: call void @foo(ptr [[P1]])
98-
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP1]]
100+
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP1]]
99101
; CHECK-NEXT: call void @foo(ptr [[P2]])
100102
; CHECK-NEXT: ret void
101103
;
@@ -160,7 +162,8 @@ define void @slsr_out_of_bounds_gep(ptr %input, i32 %s) {
160162
; CHECK-NEXT: [[T:%.*]] = sext i32 [[S]] to i64
161163
; CHECK-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[INPUT]], i64 [[T]]
162164
; CHECK-NEXT: call void @foo(ptr [[P1]])
163-
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P1]], i64 [[T]]
165+
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[T]], 2
166+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP1]]
164167
; CHECK-NEXT: call void @foo(ptr [[P2]])
165168
; CHECK-NEXT: ret void
166169
;
@@ -238,7 +241,8 @@ define void @slsr_gep_fat_pointer(ptr addrspace(2) %input, i32 %s) {
238241
; CHECK-SAME: ptr addrspace(2) [[INPUT:%.*]], i32 [[S:%.*]]) {
239242
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[INPUT]], i32 [[S]]
240243
; CHECK-NEXT: call void @baz2(ptr addrspace(2) [[P1]])
241-
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[P1]], i32 [[S]]
244+
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[S]], 2
245+
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr addrspace(2) [[P1]], i32 [[TMP1]]
242246
; CHECK-NEXT: call void @baz2(ptr addrspace(2) [[P2]])
243247
; CHECK-NEXT: ret void
244248
;

0 commit comments

Comments
 (0)