Skip to content

Commit 352a836

Browse files
authored
[InstCombine] Canonicalize non-i8 gep of mul to i8 (#96606)
This is a small canonicalization for `gep i32, p, (mul x, C)` -> `gep i8, p, (mul x, C*4)`, so that the mul can combine both of the constant multiplications, and we take a small step towards canonicalizing more geps to i8. It currently doesn't attempt to check for multiple uses on the mul, but that should be possible if it sounds better. Let me know what you think of the idea in general.
1 parent e24a212 commit 352a836

14 files changed

+550
-438
lines changed

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2787,9 +2787,16 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
27872787
GEP.getNoWrapFlags()));
27882788
}
27892789

2790-
// Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
2791-
// intrinsic. This has better support in BasicAA.
2792-
if (GEPEltType->isScalableTy()) {
2790+
// Canonicalize
2791+
// - scalable GEPs to an explicit offset using the llvm.vscale intrinsic.
2792+
// This has better support in BasicAA.
2793+
// - gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two
2794+
// multiplies together.
2795+
if (GEPEltType->isScalableTy() ||
2796+
(!GEPEltType->isIntegerTy(8) && GEP.getNumIndices() == 1 &&
2797+
match(GEP.getOperand(1),
2798+
m_OneUse(m_CombineOr(m_Mul(m_Value(), m_ConstantInt()),
2799+
m_Shl(m_Value(), m_ConstantInt())))))) {
27932800
Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
27942801
return replaceInstUsesWith(
27952802
GEP, Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.isInBounds()));
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3+
4+
define ptr @mul4(ptr %p, i64 %x) {
5+
; CHECK-LABEL: define ptr @mul4(
6+
; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) {
7+
; CHECK-NEXT: entry:
8+
; CHECK-NEXT: [[GEP_IDX:%.*]] = shl i64 [[X]], 4
9+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[GEP_IDX]]
10+
; CHECK-NEXT: ret ptr [[GEP]]
11+
;
12+
entry:
13+
%mul = mul i64 %x, 4
14+
%gep = getelementptr inbounds i32, ptr %p, i64 %mul
15+
ret ptr %gep
16+
}
17+
18+
define ptr @mul5(ptr %p, i64 %x) {
19+
; CHECK-LABEL: define ptr @mul5(
20+
; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) {
21+
; CHECK-NEXT: entry:
22+
; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 20
23+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[GEP_IDX]]
24+
; CHECK-NEXT: ret ptr [[GEP]]
25+
;
26+
entry:
27+
%mul = mul i64 %x, 5
28+
%gep = getelementptr inbounds i32, ptr %p, i64 %mul
29+
ret ptr %gep
30+
}
31+
32+
define ptr @noinbounds(ptr %p, i64 %x) {
33+
; CHECK-LABEL: define ptr @noinbounds(
34+
; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) {
35+
; CHECK-NEXT: entry:
36+
; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 20
37+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[GEP_IDX]]
38+
; CHECK-NEXT: ret ptr [[GEP]]
39+
;
40+
entry:
41+
%mul = mul i64 %x, 5
42+
%gep = getelementptr i32, ptr %p, i64 %mul
43+
ret ptr %gep
44+
}
45+
46+
define ptr @usemul(ptr %p, i64 %x) {
47+
; CHECK-LABEL: define ptr @usemul(
48+
; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) {
49+
; CHECK-NEXT: entry:
50+
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[X]], 5
51+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[MUL]]
52+
; CHECK-NEXT: call void @use(i64 [[MUL]])
53+
; CHECK-NEXT: ret ptr [[GEP]]
54+
;
55+
entry:
56+
%mul = mul i64 %x, 5
57+
%gep = getelementptr inbounds i32, ptr %p, i64 %mul
58+
call void @use(i64 %mul)
59+
ret ptr %gep
60+
}
61+
62+
define void @multiple(ptr %p, i64 %x) {
63+
; CHECK-LABEL: define void @multiple(
64+
; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) {
65+
; CHECK-NEXT: entry:
66+
; CHECK-NEXT: [[MUL21:%.*]] = mul i64 [[X]], 20
67+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[MUL21]]
68+
; CHECK-NEXT: [[MUL20:%.*]] = mul i64 [[X]], 20
69+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[MUL20]]
70+
; CHECK-NEXT: call void @use2(ptr [[GEP3]], ptr [[GEP2]])
71+
; CHECK-NEXT: ret void
72+
;
73+
entry:
74+
%mul5 = mul i64 %x, 5
75+
%gep1 = getelementptr inbounds i32, ptr %p, i64 %mul5
76+
%mul20 = mul i64 %x, 20
77+
%gep2 = getelementptr inbounds i8, ptr %p, i64 %mul20
78+
call void @use2(ptr %gep1, ptr %gep2)
79+
ret void
80+
}
81+
82+
define void @multiplestore(ptr %p, i64 %x) {
83+
; CHECK-LABEL: define void @multiplestore(
84+
; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) {
85+
; CHECK-NEXT: entry:
86+
; CHECK-NEXT: [[MUL20:%.*]] = mul i64 [[X]], 20
87+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[MUL20]]
88+
; CHECK-NEXT: [[MUL21:%.*]] = mul i64 [[X]], 20
89+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[MUL21]]
90+
; CHECK-NEXT: store i32 0, ptr [[GEP2]], align 4
91+
; CHECK-NEXT: store i32 1, ptr [[GEP3]], align 4
92+
; CHECK-NEXT: ret void
93+
;
94+
entry:
95+
%mul5 = mul i64 %x, 5
96+
%gep1 = getelementptr inbounds i32, ptr %p, i64 %mul5
97+
%mul20 = mul i64 %x, 20
98+
%gep2 = getelementptr inbounds i8, ptr %p, i64 %mul20
99+
store i32 0, ptr %gep1
100+
store i32 1, ptr %gep2
101+
ret void
102+
}
103+
104+
declare void @use(i64)
105+
declare void @use2(ptr, ptr)

llvm/test/Transforms/InstCombine/getelementptr.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,8 +1014,8 @@ define i8 @test_gep_bitcast_as1(ptr addrspace(1) %arr, i16 %N) {
10141014
; The element size of the array matches the element size of the pointer
10151015
define i64 @test_gep_bitcast_array_same_size_element(ptr %arr, i64 %N) {
10161016
; CHECK-LABEL: @test_gep_bitcast_array_same_size_element(
1017-
; CHECK-NEXT: [[V:%.*]] = shl i64 [[N:%.*]], 3
1018-
; CHECK-NEXT: [[T:%.*]] = getelementptr i64, ptr [[ARR:%.*]], i64 [[V]]
1017+
; CHECK-NEXT: [[T_IDX:%.*]] = shl i64 [[N:%.*]], 6
1018+
; CHECK-NEXT: [[T:%.*]] = getelementptr i8, ptr [[ARR:%.*]], i64 [[T_IDX]]
10191019
; CHECK-NEXT: [[X:%.*]] = load i64, ptr [[T]], align 4
10201020
; CHECK-NEXT: ret i64 [[X]]
10211021
;
@@ -1029,8 +1029,8 @@ define i64 @test_gep_bitcast_array_same_size_element(ptr %arr, i64 %N) {
10291029
define i64 @test_gep_bitcast_array_same_size_element_addrspacecast(ptr %arr, i64 %N) {
10301030
; CHECK-LABEL: @test_gep_bitcast_array_same_size_element_addrspacecast(
10311031
; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr [[ARR:%.*]] to ptr addrspace(3)
1032-
; CHECK-NEXT: [[V:%.*]] = shl i64 [[N:%.*]], 3
1033-
; CHECK-NEXT: [[T:%.*]] = getelementptr i64, ptr addrspace(3) [[CAST]], i64 [[V]]
1032+
; CHECK-NEXT: [[T_IDX:%.*]] = shl i64 [[N:%.*]], 6
1033+
; CHECK-NEXT: [[T:%.*]] = getelementptr i8, ptr addrspace(3) [[CAST]], i64 [[T_IDX]]
10341034
; CHECK-NEXT: [[X:%.*]] = load i64, ptr addrspace(3) [[T]], align 4
10351035
; CHECK-NEXT: ret i64 [[X]]
10361036
;
@@ -1057,8 +1057,8 @@ define i8 @test_gep_bitcast_array_different_size_element(ptr %arr, i64 %N) {
10571057

10581058
define i64 @test_gep_bitcast_array_same_size_element_as1(ptr addrspace(1) %arr, i16 %N) {
10591059
; CHECK-LABEL: @test_gep_bitcast_array_same_size_element_as1(
1060-
; CHECK-NEXT: [[V:%.*]] = shl i16 [[N:%.*]], 3
1061-
; CHECK-NEXT: [[T:%.*]] = getelementptr i64, ptr addrspace(1) [[ARR:%.*]], i16 [[V]]
1060+
; CHECK-NEXT: [[T_IDX:%.*]] = shl i16 [[N:%.*]], 6
1061+
; CHECK-NEXT: [[T:%.*]] = getelementptr i8, ptr addrspace(1) [[ARR:%.*]], i16 [[T_IDX]]
10621062
; CHECK-NEXT: [[X:%.*]] = load i64, ptr addrspace(1) [[T]], align 4
10631063
; CHECK-NEXT: ret i64 [[X]]
10641064
;

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -312,14 +312,14 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias
312312
; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP11]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
313313
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
314314
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
315-
; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 2
316-
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[TMP14]]
315+
; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP13]], 4
316+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[DOTIDX]]
317317
; CHECK-NEXT: store <vscale x 4 x float> [[WIDE_MASKED_GATHER]], ptr [[TMP12]], align 4
318-
; CHECK-NEXT: store <vscale x 4 x float> [[WIDE_MASKED_GATHER2]], ptr [[TMP15]], align 4
318+
; CHECK-NEXT: store <vscale x 4 x float> [[WIDE_MASKED_GATHER2]], ptr [[TMP14]], align 4
319319
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
320320
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[STEP_ADD]], [[DOTSPLAT]]
321-
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
322-
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
321+
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
322+
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
323323
; CHECK: middle.block:
324324
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
325325
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
@@ -328,11 +328,11 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias
328328
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
329329
; CHECK: for.body:
330330
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
331-
; CHECK-NEXT: [[INDVARS_IV_STRIDE2:%.*]] = shl i64 [[INDVARS_IV]], 1
332-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV_STRIDE2]]
333-
; CHECK-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4
331+
; CHECK-NEXT: [[ARRAYIDX_IDX:%.*]] = shl i64 [[INDVARS_IV]], 3
332+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[ARRAYIDX_IDX]]
333+
; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4
334334
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
335-
; CHECK-NEXT: store float [[TMP17]], ptr [[ARRAYIDX2]], align 4
335+
; CHECK-NEXT: store float [[TMP16]], ptr [[ARRAYIDX2]], align 4
336336
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
337337
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
338338
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]

llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -480,8 +480,8 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias
480480
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
481481
; CHECK: vector.body:
482482
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
483-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
484-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
483+
; CHECK-NEXT: [[DOTIDX:%.*]] = shl i64 [[INDEX]], 3
484+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[DOTIDX]]
485485
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP4]], align 4
486486
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
487487
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
@@ -566,8 +566,8 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias
566566
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
567567
; CHECK: vector.body:
568568
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
569-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
570-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
569+
; CHECK-NEXT: [[DOTIDX:%.*]] = shl i64 [[INDEX]], 3
570+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[DOTIDX]]
571571
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP12]], align 4
572572
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
573573
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0

0 commit comments

Comments
 (0)