Skip to content

Commit 7b17239

Browse files
committed
[InstCombine] Canonicalize non-i8 gep of mul to i8
This is a small canonicalization for `gep i32, p, (mul x, C)` -> `gep i8, p, (mul x, C*4)`, so that the mul can combine both of the constant multiplications, and we take a small step towards canonicalizing more geps to i8.
1 parent efa8463 commit 7b17239

16 files changed

+631
-488
lines changed

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2787,9 +2787,16 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
27872787
GEP.getNoWrapFlags()));
27882788
}
27892789

2790-
// Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
2791-
// intrinsic. This has better support in BasicAA.
2792-
if (GEPEltType->isScalableTy()) {
2790+
// Canonicalize
2791+
// - scalable GEPs to an explicit offset using the llvm.vscale intrinsic.
2792+
// This has better support in BasicAA.
2793+
// - gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two
2794+
// multiplies together.
2795+
if (GEPEltType->isScalableTy() ||
2796+
(!GEPEltType->isIntegerTy(8) && GEP.getNumIndices() == 1 &&
2797+
match(GEP.getOperand(1),
2798+
m_CombineOr(m_Mul(m_Value(), m_ConstantInt()),
2799+
m_Shl(m_Value(), m_ConstantInt()))))) {
27932800
Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
27942801
return replaceInstUsesWith(
27952802
GEP, Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.isInBounds()));
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3+
4+
define ptr @mul4(ptr %p, i64 %x) {
5+
; CHECK-LABEL: define ptr @mul4(
6+
; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) {
7+
; CHECK-NEXT: entry:
8+
; CHECK-NEXT: [[GEP_IDX:%.*]] = shl i64 [[X]], 4
9+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[GEP_IDX]]
10+
; CHECK-NEXT: ret ptr [[GEP]]
11+
;
12+
entry:
13+
%mul = mul i64 %x, 4
14+
%gep = getelementptr inbounds i32, ptr %p, i64 %mul
15+
ret ptr %gep
16+
}
17+
18+
define ptr @mul5(ptr %p, i64 %x) {
19+
; CHECK-LABEL: define ptr @mul5(
20+
; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) {
21+
; CHECK-NEXT: entry:
22+
; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 20
23+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[GEP_IDX]]
24+
; CHECK-NEXT: ret ptr [[GEP]]
25+
;
26+
entry:
27+
%mul = mul i64 %x, 5
28+
%gep = getelementptr inbounds i32, ptr %p, i64 %mul
29+
ret ptr %gep
30+
}
31+
32+
define ptr @noinbounds(ptr %p, i64 %x) {
33+
; CHECK-LABEL: define ptr @noinbounds(
34+
; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) {
35+
; CHECK-NEXT: entry:
36+
; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 20
37+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[GEP_IDX]]
38+
; CHECK-NEXT: ret ptr [[GEP]]
39+
;
40+
entry:
41+
%mul = mul i64 %x, 5
42+
%gep = getelementptr i32, ptr %p, i64 %mul
43+
ret ptr %gep
44+
}
45+
46+
define ptr @usemul(ptr %p, i64 %x) {
47+
; CHECK-LABEL: define ptr @usemul(
48+
; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) {
49+
; CHECK-NEXT: entry:
50+
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[X]], 5
51+
; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 20
52+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[GEP_IDX]]
53+
; CHECK-NEXT: call void @use(i64 [[MUL]])
54+
; CHECK-NEXT: ret ptr [[GEP]]
55+
;
56+
entry:
57+
%mul = mul i64 %x, 5
58+
%gep = getelementptr inbounds i32, ptr %p, i64 %mul
59+
call void @use(i64 %mul)
60+
ret ptr %gep
61+
}
62+
63+
define void @multiple(ptr %p, i64 %x) {
64+
; CHECK-LABEL: define void @multiple(
65+
; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) {
66+
; CHECK-NEXT: entry:
67+
; CHECK-NEXT: [[MUL21:%.*]] = mul i64 [[X]], 20
68+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[MUL21]]
69+
; CHECK-NEXT: [[MUL20:%.*]] = mul i64 [[X]], 20
70+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[MUL20]]
71+
; CHECK-NEXT: call void @use2(ptr [[GEP3]], ptr [[GEP2]])
72+
; CHECK-NEXT: ret void
73+
;
74+
entry:
75+
%mul5 = mul i64 %x, 5
76+
%gep1 = getelementptr inbounds i32, ptr %p, i64 %mul5
77+
%mul20 = mul i64 %x, 20
78+
%gep2 = getelementptr inbounds i8, ptr %p, i64 %mul20
79+
call void @use2(ptr %gep1, ptr %gep2)
80+
ret void
81+
}
82+
83+
define void @multiplestore(ptr %p, i64 %x) {
84+
; CHECK-LABEL: define void @multiplestore(
85+
; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) {
86+
; CHECK-NEXT: entry:
87+
; CHECK-NEXT: [[MUL20:%.*]] = mul i64 [[X]], 20
88+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[MUL20]]
89+
; CHECK-NEXT: [[MUL21:%.*]] = mul i64 [[X]], 20
90+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[MUL21]]
91+
; CHECK-NEXT: store i32 0, ptr [[GEP2]], align 4
92+
; CHECK-NEXT: store i32 1, ptr [[GEP3]], align 4
93+
; CHECK-NEXT: ret void
94+
;
95+
entry:
96+
%mul5 = mul i64 %x, 5
97+
%gep1 = getelementptr inbounds i32, ptr %p, i64 %mul5
98+
%mul20 = mul i64 %x, 20
99+
%gep2 = getelementptr inbounds i8, ptr %p, i64 %mul20
100+
store i32 0, ptr %gep1
101+
store i32 1, ptr %gep2
102+
ret void
103+
}
104+
105+
declare void @use(i64)
106+
declare void @use2(ptr, ptr)

llvm/test/Transforms/InstCombine/getelementptr.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,8 +1014,8 @@ define i8 @test_gep_bitcast_as1(ptr addrspace(1) %arr, i16 %N) {
10141014
; The element size of the array matches the element size of the pointer
10151015
define i64 @test_gep_bitcast_array_same_size_element(ptr %arr, i64 %N) {
10161016
; CHECK-LABEL: @test_gep_bitcast_array_same_size_element(
1017-
; CHECK-NEXT: [[V:%.*]] = shl i64 [[N:%.*]], 3
1018-
; CHECK-NEXT: [[T:%.*]] = getelementptr i64, ptr [[ARR:%.*]], i64 [[V]]
1017+
; CHECK-NEXT: [[T_IDX:%.*]] = shl i64 [[N:%.*]], 6
1018+
; CHECK-NEXT: [[T:%.*]] = getelementptr i8, ptr [[ARR:%.*]], i64 [[T_IDX]]
10191019
; CHECK-NEXT: [[X:%.*]] = load i64, ptr [[T]], align 4
10201020
; CHECK-NEXT: ret i64 [[X]]
10211021
;
@@ -1029,8 +1029,8 @@ define i64 @test_gep_bitcast_array_same_size_element(ptr %arr, i64 %N) {
10291029
define i64 @test_gep_bitcast_array_same_size_element_addrspacecast(ptr %arr, i64 %N) {
10301030
; CHECK-LABEL: @test_gep_bitcast_array_same_size_element_addrspacecast(
10311031
; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr [[ARR:%.*]] to ptr addrspace(3)
1032-
; CHECK-NEXT: [[V:%.*]] = shl i64 [[N:%.*]], 3
1033-
; CHECK-NEXT: [[T:%.*]] = getelementptr i64, ptr addrspace(3) [[CAST]], i64 [[V]]
1032+
; CHECK-NEXT: [[T_IDX:%.*]] = shl i64 [[N:%.*]], 6
1033+
; CHECK-NEXT: [[T:%.*]] = getelementptr i8, ptr addrspace(3) [[CAST]], i64 [[T_IDX]]
10341034
; CHECK-NEXT: [[X:%.*]] = load i64, ptr addrspace(3) [[T]], align 4
10351035
; CHECK-NEXT: ret i64 [[X]]
10361036
;
@@ -1057,8 +1057,8 @@ define i8 @test_gep_bitcast_array_different_size_element(ptr %arr, i64 %N) {
10571057

10581058
define i64 @test_gep_bitcast_array_same_size_element_as1(ptr addrspace(1) %arr, i16 %N) {
10591059
; CHECK-LABEL: @test_gep_bitcast_array_same_size_element_as1(
1060-
; CHECK-NEXT: [[V:%.*]] = shl i16 [[N:%.*]], 3
1061-
; CHECK-NEXT: [[T:%.*]] = getelementptr i64, ptr addrspace(1) [[ARR:%.*]], i16 [[V]]
1060+
; CHECK-NEXT: [[T_IDX:%.*]] = shl i16 [[N:%.*]], 6
1061+
; CHECK-NEXT: [[T:%.*]] = getelementptr i8, ptr addrspace(1) [[ARR:%.*]], i16 [[T_IDX]]
10621062
; CHECK-NEXT: [[X:%.*]] = load i64, ptr addrspace(1) [[T]], align 4
10631063
; CHECK-NEXT: ret i64 [[X]]
10641064
;

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -312,14 +312,14 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias
312312
; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP11]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
313313
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
314314
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
315-
; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 2
316-
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[TMP14]]
315+
; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP13]], 4
316+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[DOTIDX]]
317317
; CHECK-NEXT: store <vscale x 4 x float> [[WIDE_MASKED_GATHER]], ptr [[TMP12]], align 4
318-
; CHECK-NEXT: store <vscale x 4 x float> [[WIDE_MASKED_GATHER2]], ptr [[TMP15]], align 4
318+
; CHECK-NEXT: store <vscale x 4 x float> [[WIDE_MASKED_GATHER2]], ptr [[TMP14]], align 4
319319
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
320320
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[STEP_ADD]], [[DOTSPLAT]]
321-
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
322-
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
321+
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
322+
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
323323
; CHECK: middle.block:
324324
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
325325
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
@@ -328,11 +328,11 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias
328328
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
329329
; CHECK: for.body:
330330
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
331-
; CHECK-NEXT: [[INDVARS_IV_STRIDE2:%.*]] = shl i64 [[INDVARS_IV]], 1
332-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV_STRIDE2]]
333-
; CHECK-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4
331+
; CHECK-NEXT: [[ARRAYIDX_IDX:%.*]] = shl i64 [[INDVARS_IV]], 3
332+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[ARRAYIDX_IDX]]
333+
; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4
334334
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
335-
; CHECK-NEXT: store float [[TMP17]], ptr [[ARRAYIDX2]], align 4
335+
; CHECK-NEXT: store float [[TMP16]], ptr [[ARRAYIDX2]], align 4
336336
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
337337
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
338338
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]

llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -480,18 +480,18 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias
480480
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
481481
; CHECK: vector.body:
482482
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
483-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
484-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
483+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = and i64 [[INDEX]], 9223372036854775804
484+
; CHECK-NEXT: [[DOTIDX:%.*]] = shl i64 [[INDEX]], 3
485+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[DOTIDX]]
485486
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP4]], align 4
486487
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
487488
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
488489
; CHECK-NEXT: [[TMP6:%.*]] = shl nsw <vscale x 4 x i32> [[TMP5]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
489-
; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[INDEX]], 9223372036854775804
490-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP7]]
491-
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP6]], ptr [[TMP8]], align 4
490+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[OFFSET_IDX]]
491+
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP6]], ptr [[TMP7]], align 4
492492
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
493-
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
494-
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
493+
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
494+
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
495495
; CHECK: middle.block:
496496
; CHECK-NEXT: br label [[SCALAR_PH]]
497497
; CHECK: scalar.ph:
@@ -566,18 +566,18 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias
566566
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
567567
; CHECK: vector.body:
568568
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
569-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
570-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
569+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = and i64 [[INDEX]], 9223372036854775804
570+
; CHECK-NEXT: [[DOTIDX:%.*]] = shl i64 [[INDEX]], 3
571+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[DOTIDX]]
571572
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP12]], align 4
572573
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
573574
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
574575
; CHECK-NEXT: [[TMP14:%.*]] = shl nsw <vscale x 4 x i32> [[TMP13]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
575-
; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[INDEX]], 9223372036854775804
576-
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP15]]
577-
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP14]], ptr [[TMP16]], align 4
576+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[OFFSET_IDX]]
577+
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP14]], ptr [[TMP15]], align 4
578578
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
579-
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
580-
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
579+
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
580+
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
581581
; CHECK: middle.block:
582582
; CHECK-NEXT: br label [[SCALAR_PH]]
583583
; CHECK: scalar.ph:
@@ -715,7 +715,8 @@ define void @mixed_load2_store2(ptr noalias nocapture readonly %A, ptr noalias n
715715
; CHECK: vector.body:
716716
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
717717
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
718-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
718+
; CHECK-NEXT: [[DOTIDX:%.*]] = shl i64 [[INDEX]], 3
719+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[DOTIDX]]
719720
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP2]], align 4
720721
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
721722
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0

0 commit comments

Comments
 (0)