@@ -10,11 +10,11 @@ define i32 @dotp_z_s(ptr %a, ptr %b) #0 {
10
10
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
11
11
; CHECK-NEXT: entry:
12
12
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
13
- ; CHECK-NEXT: [[TMP13 :%.*]] = mul i64 [[TMP0]], 16
13
+ ; CHECK-NEXT: [[TMP1 :%.*]] = mul i64 [[TMP0]], 16
14
14
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
15
15
; CHECK: vector.ph:
16
- ; CHECK-NEXT: [[TMP14 :%.*]] = call i64 @llvm.vscale.i64()
17
- ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP14 ]], 16
16
+ ; CHECK-NEXT: [[TMP2 :%.*]] = call i64 @llvm.vscale.i64()
17
+ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2 ]], 16
18
18
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
19
19
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
20
20
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
@@ -24,21 +24,21 @@ define i32 @dotp_z_s(ptr %a, ptr %b) #0 {
24
24
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
25
25
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
26
26
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ]
27
- ; CHECK-NEXT: [[TMP1 :%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
28
- ; CHECK-NEXT: [[TMP2 :%.*]] = getelementptr i8, ptr [[TMP1 ]], i32 0
27
+ ; CHECK-NEXT: [[TMP6 :%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
28
+ ; CHECK-NEXT: [[TMP7 :%.*]] = getelementptr i8, ptr [[TMP6 ]], i32 0
29
29
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
30
30
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8
31
- ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP1 ]], i64 [[TMP9]]
32
- ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP2 ]], align 1
31
+ ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP6 ]], i64 [[TMP9]]
32
+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP7 ]], align 1
33
33
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr [[TMP10]], align 1
34
34
; CHECK-NEXT: [[TMP11:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32>
35
35
; CHECK-NEXT: [[TMP12:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32>
36
- ; CHECK-NEXT: [[TMP6 :%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
37
- ; CHECK-NEXT: [[TMP7 :%.*]] = getelementptr i8, ptr [[TMP6 ]], i32 0
36
+ ; CHECK-NEXT: [[TMP13 :%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
37
+ ; CHECK-NEXT: [[TMP14 :%.*]] = getelementptr i8, ptr [[TMP13 ]], i32 0
38
38
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
39
39
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 8
40
- ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP6 ]], i64 [[TMP16]]
41
- ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x i8>, ptr [[TMP7 ]], align 1
40
+ ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13 ]], i64 [[TMP16]]
41
+ ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x i8>, ptr [[TMP14 ]], align 1
42
42
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x i8>, ptr [[TMP17]], align 1
43
43
; CHECK-NEXT: [[TMP18:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD3]] to <vscale x 8 x i32>
44
44
; CHECK-NEXT: [[TMP19:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD4]] to <vscale x 8 x i32>
@@ -60,48 +60,48 @@ define i32 @dotp_z_s(ptr %a, ptr %b) #0 {
60
60
; CHECK-NOI8MM-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
61
61
; CHECK-NOI8MM-NEXT: entry:
62
62
; CHECK-NOI8MM-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
63
- ; CHECK-NOI8MM-NEXT: [[TMP13 :%.*]] = mul i64 [[TMP0]], 16
63
+ ; CHECK-NOI8MM-NEXT: [[TMP1 :%.*]] = mul i64 [[TMP0]], 16
64
64
; CHECK-NOI8MM-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
65
65
; CHECK-NOI8MM: vector.ph:
66
- ; CHECK-NOI8MM-NEXT: [[TMP14 :%.*]] = call i64 @llvm.vscale.i64()
67
- ; CHECK-NOI8MM-NEXT: [[TMP3:%.*]] = mul i64 [[TMP14 ]], 16
66
+ ; CHECK-NOI8MM-NEXT: [[TMP2 :%.*]] = call i64 @llvm.vscale.i64()
67
+ ; CHECK-NOI8MM-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2 ]], 16
68
68
; CHECK-NOI8MM-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
69
69
; CHECK-NOI8MM-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
70
70
; CHECK-NOI8MM-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
71
71
; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16
72
72
; CHECK-NOI8MM-NEXT: br label [[VECTOR_BODY:%.*]]
73
73
; CHECK-NOI8MM: vector.body:
74
74
; CHECK-NOI8MM-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
75
- ; CHECK-NOI8MM-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE :%.*]], [[VECTOR_BODY]] ]
76
- ; CHECK-NOI8MM-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5 :%.*]], [[VECTOR_BODY]] ]
77
- ; CHECK-NOI8MM-NEXT: [[TMP1 :%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
78
- ; CHECK-NOI8MM-NEXT: [[TMP2 :%.*]] = getelementptr i8, ptr [[TMP1 ]], i32 0
75
+ ; CHECK-NOI8MM-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP22 :%.*]], [[VECTOR_BODY]] ]
76
+ ; CHECK-NOI8MM-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23 :%.*]], [[VECTOR_BODY]] ]
77
+ ; CHECK-NOI8MM-NEXT: [[TMP6 :%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
78
+ ; CHECK-NOI8MM-NEXT: [[TMP7 :%.*]] = getelementptr i8, ptr [[TMP6 ]], i32 0
79
79
; CHECK-NOI8MM-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
80
80
; CHECK-NOI8MM-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8
81
- ; CHECK-NOI8MM-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP1 ]], i64 [[TMP9]]
82
- ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP2 ]], align 1
81
+ ; CHECK-NOI8MM-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP6 ]], i64 [[TMP9]]
82
+ ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP7 ]], align 1
83
83
; CHECK-NOI8MM-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr [[TMP10]], align 1
84
84
; CHECK-NOI8MM-NEXT: [[TMP11:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32>
85
85
; CHECK-NOI8MM-NEXT: [[TMP12:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32>
86
- ; CHECK-NOI8MM-NEXT: [[TMP6 :%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
87
- ; CHECK-NOI8MM-NEXT: [[TMP7 :%.*]] = getelementptr i8, ptr [[TMP6 ]], i32 0
86
+ ; CHECK-NOI8MM-NEXT: [[TMP13 :%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
87
+ ; CHECK-NOI8MM-NEXT: [[TMP14 :%.*]] = getelementptr i8, ptr [[TMP13 ]], i32 0
88
88
; CHECK-NOI8MM-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
89
89
; CHECK-NOI8MM-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 8
90
- ; CHECK-NOI8MM-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP6 ]], i64 [[TMP16]]
91
- ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x i8>, ptr [[TMP7 ]], align 1
90
+ ; CHECK-NOI8MM-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13 ]], i64 [[TMP16]]
91
+ ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x i8>, ptr [[TMP14 ]], align 1
92
92
; CHECK-NOI8MM-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x i8>, ptr [[TMP17]], align 1
93
93
; CHECK-NOI8MM-NEXT: [[TMP18:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD3]] to <vscale x 8 x i32>
94
94
; CHECK-NOI8MM-NEXT: [[TMP19:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD4]] to <vscale x 8 x i32>
95
95
; CHECK-NOI8MM-NEXT: [[TMP20:%.*]] = mul <vscale x 8 x i32> [[TMP18]], [[TMP11]]
96
96
; CHECK-NOI8MM-NEXT: [[TMP21:%.*]] = mul <vscale x 8 x i32> [[TMP19]], [[TMP12]]
97
- ; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE ]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI ]], <vscale x 8 x i32> [[TMP20]])
98
- ; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE5 ]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI1 ]], <vscale x 8 x i32> [[TMP21]])
97
+ ; CHECK-NOI8MM-NEXT: [[TMP22 ]] = add <vscale x 8 x i32> [[TMP20 ]], [[VEC_PHI]]
98
+ ; CHECK-NOI8MM-NEXT: [[TMP23 ]] = add <vscale x 8 x i32> [[TMP21 ]], [[VEC_PHI1]]
99
99
; CHECK-NOI8MM-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
100
100
; CHECK-NOI8MM-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
101
101
; CHECK-NOI8MM-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
102
102
; CHECK-NOI8MM: middle.block:
103
- ; CHECK-NOI8MM-NEXT: [[BIN_RDX:%.*]] = add <vscale x 2 x i32> [[PARTIAL_REDUCE5 ]], [[PARTIAL_REDUCE ]]
104
- ; CHECK-NOI8MM-NEXT: [[TMP23 :%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32 (<vscale x 2 x i32> [[BIN_RDX]])
103
+ ; CHECK-NOI8MM-NEXT: [[BIN_RDX:%.*]] = add <vscale x 8 x i32> [[TMP23 ]], [[TMP22 ]]
104
+ ; CHECK-NOI8MM-NEXT: [[TMP25 :%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32 (<vscale x 8 x i32> [[BIN_RDX]])
105
105
; CHECK-NOI8MM-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
106
106
; CHECK-NOI8MM-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
107
107
; CHECK-NOI8MM: scalar.ph:
@@ -133,11 +133,11 @@ define i32 @dotp_s_z(ptr %a, ptr %b) #0 {
133
133
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
134
134
; CHECK-NEXT: entry:
135
135
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
136
- ; CHECK-NEXT: [[TMP13 :%.*]] = mul i64 [[TMP0]], 16
136
+ ; CHECK-NEXT: [[TMP1 :%.*]] = mul i64 [[TMP0]], 16
137
137
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
138
138
; CHECK: vector.ph:
139
- ; CHECK-NEXT: [[TMP14 :%.*]] = call i64 @llvm.vscale.i64()
140
- ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP14 ]], 16
139
+ ; CHECK-NEXT: [[TMP2 :%.*]] = call i64 @llvm.vscale.i64()
140
+ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2 ]], 16
141
141
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
142
142
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
143
143
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
@@ -147,21 +147,21 @@ define i32 @dotp_s_z(ptr %a, ptr %b) #0 {
147
147
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
148
148
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
149
149
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ]
150
- ; CHECK-NEXT: [[TMP1 :%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
151
- ; CHECK-NEXT: [[TMP2 :%.*]] = getelementptr i8, ptr [[TMP1 ]], i32 0
150
+ ; CHECK-NEXT: [[TMP6 :%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
151
+ ; CHECK-NEXT: [[TMP7 :%.*]] = getelementptr i8, ptr [[TMP6 ]], i32 0
152
152
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
153
153
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8
154
- ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP1 ]], i64 [[TMP9]]
155
- ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP2 ]], align 1
154
+ ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP6 ]], i64 [[TMP9]]
155
+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP7 ]], align 1
156
156
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr [[TMP10]], align 1
157
157
; CHECK-NEXT: [[TMP11:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32>
158
158
; CHECK-NEXT: [[TMP12:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32>
159
- ; CHECK-NEXT: [[TMP6 :%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
160
- ; CHECK-NEXT: [[TMP7 :%.*]] = getelementptr i8, ptr [[TMP6 ]], i32 0
159
+ ; CHECK-NEXT: [[TMP13 :%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
160
+ ; CHECK-NEXT: [[TMP14 :%.*]] = getelementptr i8, ptr [[TMP13 ]], i32 0
161
161
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
162
162
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 8
163
- ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP6 ]], i64 [[TMP16]]
164
- ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x i8>, ptr [[TMP7 ]], align 1
163
+ ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13 ]], i64 [[TMP16]]
164
+ ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x i8>, ptr [[TMP14 ]], align 1
165
165
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x i8>, ptr [[TMP17]], align 1
166
166
; CHECK-NEXT: [[TMP18:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD3]] to <vscale x 8 x i32>
167
167
; CHECK-NEXT: [[TMP19:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD4]] to <vscale x 8 x i32>
@@ -183,48 +183,48 @@ define i32 @dotp_s_z(ptr %a, ptr %b) #0 {
183
183
; CHECK-NOI8MM-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
184
184
; CHECK-NOI8MM-NEXT: entry:
185
185
; CHECK-NOI8MM-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
186
- ; CHECK-NOI8MM-NEXT: [[TMP13 :%.*]] = mul i64 [[TMP0]], 16
186
+ ; CHECK-NOI8MM-NEXT: [[TMP1 :%.*]] = mul i64 [[TMP0]], 16
187
187
; CHECK-NOI8MM-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
188
188
; CHECK-NOI8MM: vector.ph:
189
- ; CHECK-NOI8MM-NEXT: [[TMP14 :%.*]] = call i64 @llvm.vscale.i64()
190
- ; CHECK-NOI8MM-NEXT: [[TMP3:%.*]] = mul i64 [[TMP14 ]], 16
189
+ ; CHECK-NOI8MM-NEXT: [[TMP2 :%.*]] = call i64 @llvm.vscale.i64()
190
+ ; CHECK-NOI8MM-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2 ]], 16
191
191
; CHECK-NOI8MM-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
192
192
; CHECK-NOI8MM-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
193
193
; CHECK-NOI8MM-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
194
194
; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16
195
195
; CHECK-NOI8MM-NEXT: br label [[VECTOR_BODY:%.*]]
196
196
; CHECK-NOI8MM: vector.body:
197
197
; CHECK-NOI8MM-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
198
- ; CHECK-NOI8MM-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE :%.*]], [[VECTOR_BODY]] ]
199
- ; CHECK-NOI8MM-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5 :%.*]], [[VECTOR_BODY]] ]
200
- ; CHECK-NOI8MM-NEXT: [[TMP1 :%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
201
- ; CHECK-NOI8MM-NEXT: [[TMP2 :%.*]] = getelementptr i8, ptr [[TMP1 ]], i32 0
198
+ ; CHECK-NOI8MM-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP22 :%.*]], [[VECTOR_BODY]] ]
199
+ ; CHECK-NOI8MM-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23 :%.*]], [[VECTOR_BODY]] ]
200
+ ; CHECK-NOI8MM-NEXT: [[TMP6 :%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
201
+ ; CHECK-NOI8MM-NEXT: [[TMP7 :%.*]] = getelementptr i8, ptr [[TMP6 ]], i32 0
202
202
; CHECK-NOI8MM-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
203
203
; CHECK-NOI8MM-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8
204
- ; CHECK-NOI8MM-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP1 ]], i64 [[TMP9]]
205
- ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP2 ]], align 1
204
+ ; CHECK-NOI8MM-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP6 ]], i64 [[TMP9]]
205
+ ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP7 ]], align 1
206
206
; CHECK-NOI8MM-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr [[TMP10]], align 1
207
207
; CHECK-NOI8MM-NEXT: [[TMP11:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32>
208
208
; CHECK-NOI8MM-NEXT: [[TMP12:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32>
209
- ; CHECK-NOI8MM-NEXT: [[TMP6 :%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
210
- ; CHECK-NOI8MM-NEXT: [[TMP7 :%.*]] = getelementptr i8, ptr [[TMP6 ]], i32 0
209
+ ; CHECK-NOI8MM-NEXT: [[TMP13 :%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
210
+ ; CHECK-NOI8MM-NEXT: [[TMP14 :%.*]] = getelementptr i8, ptr [[TMP13 ]], i32 0
211
211
; CHECK-NOI8MM-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
212
212
; CHECK-NOI8MM-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 8
213
- ; CHECK-NOI8MM-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP6 ]], i64 [[TMP16]]
214
- ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x i8>, ptr [[TMP7 ]], align 1
213
+ ; CHECK-NOI8MM-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13 ]], i64 [[TMP16]]
214
+ ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x i8>, ptr [[TMP14 ]], align 1
215
215
; CHECK-NOI8MM-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x i8>, ptr [[TMP17]], align 1
216
216
; CHECK-NOI8MM-NEXT: [[TMP18:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD3]] to <vscale x 8 x i32>
217
217
; CHECK-NOI8MM-NEXT: [[TMP19:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD4]] to <vscale x 8 x i32>
218
218
; CHECK-NOI8MM-NEXT: [[TMP20:%.*]] = mul <vscale x 8 x i32> [[TMP18]], [[TMP11]]
219
219
; CHECK-NOI8MM-NEXT: [[TMP21:%.*]] = mul <vscale x 8 x i32> [[TMP19]], [[TMP12]]
220
- ; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE ]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI ]], <vscale x 8 x i32> [[TMP20]])
221
- ; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE5 ]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI1 ]], <vscale x 8 x i32> [[TMP21]])
220
+ ; CHECK-NOI8MM-NEXT: [[TMP22 ]] = add <vscale x 8 x i32> [[TMP20 ]], [[VEC_PHI]]
221
+ ; CHECK-NOI8MM-NEXT: [[TMP23 ]] = add <vscale x 8 x i32> [[TMP21 ]], [[VEC_PHI1]]
222
222
; CHECK-NOI8MM-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
223
223
; CHECK-NOI8MM-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
224
224
; CHECK-NOI8MM-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
225
225
; CHECK-NOI8MM: middle.block:
226
- ; CHECK-NOI8MM-NEXT: [[BIN_RDX:%.*]] = add <vscale x 2 x i32> [[PARTIAL_REDUCE5 ]], [[PARTIAL_REDUCE ]]
227
- ; CHECK-NOI8MM-NEXT: [[TMP23 :%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32 (<vscale x 2 x i32> [[BIN_RDX]])
226
+ ; CHECK-NOI8MM-NEXT: [[BIN_RDX:%.*]] = add <vscale x 8 x i32> [[TMP23 ]], [[TMP22 ]]
227
+ ; CHECK-NOI8MM-NEXT: [[TMP25 :%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32 (<vscale x 8 x i32> [[BIN_RDX]])
228
228
; CHECK-NOI8MM-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
229
229
; CHECK-NOI8MM-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
230
230
; CHECK-NOI8MM: scalar.ph:
0 commit comments