@@ -106,11 +106,12 @@ define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) {
106
106
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
107
107
; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
108
108
; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0)
109
- ; CHECK-NEXT: vle64 .v v8, (a0)
109
+ ; CHECK-NEXT: vle8 .v v8, (a0)
110
110
; CHECK-NEXT: vid.v v16
111
111
; CHECK-NEXT: vsaddu.vx v16, v16, a1
112
112
; CHECK-NEXT: vmsltu.vx v0, v16, a2
113
- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
113
+ ; CHECK-NEXT: vsext.vf8 v16, v8
114
+ ; CHECK-NEXT: vsaddu.vx v8, v16, a1
114
115
; CHECK-NEXT: vmsltu.vx v16, v8, a2
115
116
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
116
117
; CHECK-NEXT: vslideup.vi v0, v16, 2
@@ -125,27 +126,30 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
125
126
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
126
127
; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
127
128
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0)
128
- ; CHECK-NEXT: vle64 .v v8, (a0)
129
+ ; CHECK-NEXT: vle8 .v v8, (a0)
129
130
; CHECK-NEXT: vid.v v16
130
131
; CHECK-NEXT: vsaddu.vx v16, v16, a1
131
132
; CHECK-NEXT: vmsltu.vx v0, v16, a2
132
- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
133
+ ; CHECK-NEXT: vsext.vf8 v16, v8
134
+ ; CHECK-NEXT: vsaddu.vx v8, v16, a1
133
135
; CHECK-NEXT: vmsltu.vx v16, v8, a2
134
136
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
135
137
; CHECK-NEXT: vslideup.vi v0, v16, 2
136
138
; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
137
139
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_1)
138
140
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
139
- ; CHECK-NEXT: vle64.v v8, (a0)
140
- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
141
+ ; CHECK-NEXT: vle8.v v8, (a0)
142
+ ; CHECK-NEXT: vsext.vf8 v16, v8
143
+ ; CHECK-NEXT: vsaddu.vx v8, v16, a1
141
144
; CHECK-NEXT: vmsltu.vx v16, v8, a2
142
145
; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
143
146
; CHECK-NEXT: vslideup.vi v0, v16, 4
144
147
; CHECK-NEXT: lui a0, %hi(.LCPI9_2)
145
148
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2)
146
149
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
147
- ; CHECK-NEXT: vle64.v v8, (a0)
148
- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
150
+ ; CHECK-NEXT: vle8.v v8, (a0)
151
+ ; CHECK-NEXT: vsext.vf8 v16, v8
152
+ ; CHECK-NEXT: vsaddu.vx v8, v16, a1
149
153
; CHECK-NEXT: vmsltu.vx v16, v8, a2
150
154
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
151
155
; CHECK-NEXT: vslideup.vi v0, v16, 6
@@ -160,59 +164,66 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
160
164
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
161
165
; CHECK-NEXT: lui a0, %hi(.LCPI10_0)
162
166
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0)
163
- ; CHECK-NEXT: vle64 .v v8, (a0)
167
+ ; CHECK-NEXT: vle8 .v v8, (a0)
164
168
; CHECK-NEXT: vid.v v16
165
169
; CHECK-NEXT: vsaddu.vx v16, v16, a1
166
170
; CHECK-NEXT: vmsltu.vx v0, v16, a2
167
- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
171
+ ; CHECK-NEXT: vsext.vf8 v16, v8
172
+ ; CHECK-NEXT: vsaddu.vx v8, v16, a1
168
173
; CHECK-NEXT: vmsltu.vx v16, v8, a2
169
174
; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma
170
175
; CHECK-NEXT: vslideup.vi v0, v16, 2
171
176
; CHECK-NEXT: lui a0, %hi(.LCPI10_1)
172
177
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_1)
173
178
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
174
- ; CHECK-NEXT: vle64.v v8, (a0)
175
- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
179
+ ; CHECK-NEXT: vle8.v v8, (a0)
180
+ ; CHECK-NEXT: vsext.vf8 v16, v8
181
+ ; CHECK-NEXT: vsaddu.vx v8, v16, a1
176
182
; CHECK-NEXT: vmsltu.vx v16, v8, a2
177
183
; CHECK-NEXT: vsetivli zero, 6, e8, m1, tu, ma
178
184
; CHECK-NEXT: vslideup.vi v0, v16, 4
179
185
; CHECK-NEXT: lui a0, %hi(.LCPI10_2)
180
186
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2)
181
187
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
182
- ; CHECK-NEXT: vle64.v v8, (a0)
183
- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
188
+ ; CHECK-NEXT: vle8.v v8, (a0)
189
+ ; CHECK-NEXT: vsext.vf8 v16, v8
190
+ ; CHECK-NEXT: vsaddu.vx v8, v16, a1
184
191
; CHECK-NEXT: vmsltu.vx v16, v8, a2
185
192
; CHECK-NEXT: vsetivli zero, 8, e8, m1, tu, ma
186
193
; CHECK-NEXT: vslideup.vi v0, v16, 6
187
194
; CHECK-NEXT: lui a0, %hi(.LCPI10_3)
188
195
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3)
189
196
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
190
- ; CHECK-NEXT: vle64.v v8, (a0)
191
- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
197
+ ; CHECK-NEXT: vle8.v v8, (a0)
198
+ ; CHECK-NEXT: vsext.vf8 v16, v8
199
+ ; CHECK-NEXT: vsaddu.vx v8, v16, a1
192
200
; CHECK-NEXT: vmsltu.vx v16, v8, a2
193
201
; CHECK-NEXT: vsetivli zero, 10, e8, m1, tu, ma
194
202
; CHECK-NEXT: vslideup.vi v0, v16, 8
195
203
; CHECK-NEXT: lui a0, %hi(.LCPI10_4)
196
204
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4)
197
205
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
198
- ; CHECK-NEXT: vle64.v v8, (a0)
199
- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
206
+ ; CHECK-NEXT: vle8.v v8, (a0)
207
+ ; CHECK-NEXT: vsext.vf8 v16, v8
208
+ ; CHECK-NEXT: vsaddu.vx v8, v16, a1
200
209
; CHECK-NEXT: vmsltu.vx v16, v8, a2
201
210
; CHECK-NEXT: vsetivli zero, 12, e8, m1, tu, ma
202
211
; CHECK-NEXT: vslideup.vi v0, v16, 10
203
212
; CHECK-NEXT: lui a0, %hi(.LCPI10_5)
204
213
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5)
205
214
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
206
- ; CHECK-NEXT: vle64.v v8, (a0)
207
- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
215
+ ; CHECK-NEXT: vle8.v v8, (a0)
216
+ ; CHECK-NEXT: vsext.vf8 v16, v8
217
+ ; CHECK-NEXT: vsaddu.vx v8, v16, a1
208
218
; CHECK-NEXT: vmsltu.vx v16, v8, a2
209
219
; CHECK-NEXT: vsetivli zero, 14, e8, m1, tu, ma
210
220
; CHECK-NEXT: vslideup.vi v0, v16, 12
211
221
; CHECK-NEXT: lui a0, %hi(.LCPI10_6)
212
222
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6)
213
223
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
214
- ; CHECK-NEXT: vle64.v v8, (a0)
215
- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
224
+ ; CHECK-NEXT: vle8.v v8, (a0)
225
+ ; CHECK-NEXT: vsext.vf8 v16, v8
226
+ ; CHECK-NEXT: vsaddu.vx v8, v16, a1
216
227
; CHECK-NEXT: vmsltu.vx v16, v8, a2
217
228
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
218
229
; CHECK-NEXT: vslideup.vi v0, v16, 14
0 commit comments