@@ -124,42 +124,40 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
124
124
; ZVFH32: # %bb.0:
125
125
; ZVFH32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
126
126
; ZVFH32-NEXT: vfncvt.rtz.x.f.w v9, v8
127
- ; ZVFH32-NEXT: lui a1, 8
128
127
; ZVFH32-NEXT: vslidedown.vi v8, v9, 2
129
- ; ZVFH32-NEXT: vmv.x.s a2, v9
130
- ; ZVFH32-NEXT: addi a1, a1, -1
128
+ ; ZVFH32-NEXT: vmv.x.s a1, v9
131
129
; ZVFH32-NEXT: vslidedown.vi v9, v9, 1
132
- ; ZVFH32-NEXT: vmv.x.s a3, v8
133
- ; ZVFH32-NEXT: and a2, a2, a1
134
- ; ZVFH32-NEXT: vmv.x.s a4, v9
135
- ; ZVFH32-NEXT: and a1, a4, a1
136
- ; ZVFH32-NEXT: slli a4, a3, 17
137
- ; ZVFH32-NEXT: slli a3, a3, 30
138
- ; ZVFH32-NEXT: srli a4, a4, 19
139
- ; ZVFH32-NEXT: slli a1, a1, 15
140
- ; ZVFH32-NEXT: or a2, a2, a3
141
- ; ZVFH32-NEXT: or a1, a2, a1
130
+ ; ZVFH32-NEXT: vmv.x.s a2, v8
131
+ ; ZVFH32-NEXT: slli a1, a1, 17
132
+ ; ZVFH32-NEXT: srli a1, a1, 17
133
+ ; ZVFH32-NEXT: slli a3, a2, 30
134
+ ; ZVFH32-NEXT: or a1, a1, a3
135
+ ; ZVFH32-NEXT: vmv.x.s a3, v9
136
+ ; ZVFH32-NEXT: slli a2, a2, 17
137
+ ; ZVFH32-NEXT: slli a3, a3, 17
138
+ ; ZVFH32-NEXT: srli a2, a2, 19
139
+ ; ZVFH32-NEXT: srli a3, a3, 2
140
+ ; ZVFH32-NEXT: or a1, a1, a3
142
141
; ZVFH32-NEXT: sw a1, 0(a0)
143
- ; ZVFH32-NEXT: sh a4 , 4(a0)
142
+ ; ZVFH32-NEXT: sh a2 , 4(a0)
144
143
; ZVFH32-NEXT: ret
145
144
;
146
145
; ZVFH64-LABEL: fp2si_v3f32_v3i15:
147
146
; ZVFH64: # %bb.0:
148
147
; ZVFH64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
149
148
; ZVFH64-NEXT: vfncvt.rtz.x.f.w v9, v8
150
- ; ZVFH64-NEXT: lui a1, 8
151
- ; ZVFH64-NEXT: vmv.x.s a2, v9
152
- ; ZVFH64-NEXT: addiw a1, a1, -1
149
+ ; ZVFH64-NEXT: vmv.x.s a1, v9
153
150
; ZVFH64-NEXT: vslidedown.vi v8, v9, 1
154
151
; ZVFH64-NEXT: vslidedown.vi v9, v9, 2
155
- ; ZVFH64-NEXT: and a2, a2, a1
156
- ; ZVFH64-NEXT: vmv.x.s a3, v8
157
- ; ZVFH64-NEXT: and a1, a3, a1
152
+ ; ZVFH64-NEXT: slli a1, a1, 49
153
+ ; ZVFH64-NEXT: vmv.x.s a2, v8
158
154
; ZVFH64-NEXT: vmv.x.s a3, v9
155
+ ; ZVFH64-NEXT: srli a1, a1, 49
156
+ ; ZVFH64-NEXT: slli a2, a2, 49
159
157
; ZVFH64-NEXT: slli a3, a3, 30
160
- ; ZVFH64-NEXT: slli a1, a1, 15
161
- ; ZVFH64-NEXT: or a2, a2 , a3
162
- ; ZVFH64-NEXT: or a1, a2, a1
158
+ ; ZVFH64-NEXT: srli a2, a2, 34
159
+ ; ZVFH64-NEXT: or a1, a1 , a3
160
+ ; ZVFH64-NEXT: or a1, a1, a2
163
161
; ZVFH64-NEXT: slli a2, a1, 19
164
162
; ZVFH64-NEXT: srli a2, a2, 51
165
163
; ZVFH64-NEXT: sw a1, 0(a0)
@@ -170,42 +168,40 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
170
168
; ZVFHMIN32: # %bb.0:
171
169
; ZVFHMIN32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
172
170
; ZVFHMIN32-NEXT: vfncvt.rtz.x.f.w v9, v8
173
- ; ZVFHMIN32-NEXT: lui a1, 8
174
171
; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 2
175
- ; ZVFHMIN32-NEXT: vmv.x.s a2, v9
176
- ; ZVFHMIN32-NEXT: addi a1, a1, -1
172
+ ; ZVFHMIN32-NEXT: vmv.x.s a1, v9
177
173
; ZVFHMIN32-NEXT: vslidedown.vi v9, v9, 1
178
- ; ZVFHMIN32-NEXT: vmv.x.s a3, v8
179
- ; ZVFHMIN32-NEXT: and a2, a2, a1
180
- ; ZVFHMIN32-NEXT: vmv.x.s a4, v9
181
- ; ZVFHMIN32-NEXT: and a1, a4, a1
182
- ; ZVFHMIN32-NEXT: slli a4, a3, 17
183
- ; ZVFHMIN32-NEXT: slli a3, a3, 30
184
- ; ZVFHMIN32-NEXT: srli a4, a4, 19
185
- ; ZVFHMIN32-NEXT: slli a1, a1, 15
186
- ; ZVFHMIN32-NEXT: or a2, a2, a3
187
- ; ZVFHMIN32-NEXT: or a1, a2, a1
174
+ ; ZVFHMIN32-NEXT: vmv.x.s a2, v8
175
+ ; ZVFHMIN32-NEXT: slli a1, a1, 17
176
+ ; ZVFHMIN32-NEXT: srli a1, a1, 17
177
+ ; ZVFHMIN32-NEXT: slli a3, a2, 30
178
+ ; ZVFHMIN32-NEXT: or a1, a1, a3
179
+ ; ZVFHMIN32-NEXT: vmv.x.s a3, v9
180
+ ; ZVFHMIN32-NEXT: slli a2, a2, 17
181
+ ; ZVFHMIN32-NEXT: slli a3, a3, 17
182
+ ; ZVFHMIN32-NEXT: srli a2, a2, 19
183
+ ; ZVFHMIN32-NEXT: srli a3, a3, 2
184
+ ; ZVFHMIN32-NEXT: or a1, a1, a3
188
185
; ZVFHMIN32-NEXT: sw a1, 0(a0)
189
- ; ZVFHMIN32-NEXT: sh a4 , 4(a0)
186
+ ; ZVFHMIN32-NEXT: sh a2 , 4(a0)
190
187
; ZVFHMIN32-NEXT: ret
191
188
;
192
189
; ZVFHMIN64-LABEL: fp2si_v3f32_v3i15:
193
190
; ZVFHMIN64: # %bb.0:
194
191
; ZVFHMIN64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
195
192
; ZVFHMIN64-NEXT: vfncvt.rtz.x.f.w v9, v8
196
- ; ZVFHMIN64-NEXT: lui a1, 8
197
- ; ZVFHMIN64-NEXT: vmv.x.s a2, v9
198
- ; ZVFHMIN64-NEXT: addiw a1, a1, -1
193
+ ; ZVFHMIN64-NEXT: vmv.x.s a1, v9
199
194
; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 1
200
195
; ZVFHMIN64-NEXT: vslidedown.vi v9, v9, 2
201
- ; ZVFHMIN64-NEXT: and a2, a2, a1
202
- ; ZVFHMIN64-NEXT: vmv.x.s a3, v8
203
- ; ZVFHMIN64-NEXT: and a1, a3, a1
196
+ ; ZVFHMIN64-NEXT: slli a1, a1, 49
197
+ ; ZVFHMIN64-NEXT: vmv.x.s a2, v8
204
198
; ZVFHMIN64-NEXT: vmv.x.s a3, v9
199
+ ; ZVFHMIN64-NEXT: srli a1, a1, 49
200
+ ; ZVFHMIN64-NEXT: slli a2, a2, 49
205
201
; ZVFHMIN64-NEXT: slli a3, a3, 30
206
- ; ZVFHMIN64-NEXT: slli a1, a1, 15
207
- ; ZVFHMIN64-NEXT: or a2, a2 , a3
208
- ; ZVFHMIN64-NEXT: or a1, a2, a1
202
+ ; ZVFHMIN64-NEXT: srli a2, a2, 34
203
+ ; ZVFHMIN64-NEXT: or a1, a1 , a3
204
+ ; ZVFHMIN64-NEXT: or a1, a1, a2
209
205
; ZVFHMIN64-NEXT: slli a2, a1, 19
210
206
; ZVFHMIN64-NEXT: srli a2, a2, 51
211
207
; ZVFHMIN64-NEXT: sw a1, 0(a0)
@@ -221,42 +217,40 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
221
217
; ZVFH32: # %bb.0:
222
218
; ZVFH32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
223
219
; ZVFH32-NEXT: vfncvt.rtz.x.f.w v9, v8
224
- ; ZVFH32-NEXT: lui a1, 16
225
220
; ZVFH32-NEXT: vslidedown.vi v8, v9, 2
226
- ; ZVFH32-NEXT: vmv.x.s a2, v9
227
- ; ZVFH32-NEXT: addi a1, a1, -1
221
+ ; ZVFH32-NEXT: vmv.x.s a1, v9
228
222
; ZVFH32-NEXT: vslidedown.vi v9, v9, 1
229
- ; ZVFH32-NEXT: vmv.x.s a3, v8
230
- ; ZVFH32-NEXT: and a2, a2, a1
231
- ; ZVFH32-NEXT: vmv.x.s a4, v9
232
- ; ZVFH32-NEXT: and a1, a4, a1
233
- ; ZVFH32-NEXT: slli a4, a3, 17
234
- ; ZVFH32-NEXT: slli a3, a3, 30
235
- ; ZVFH32-NEXT: srli a4, a4, 19
236
- ; ZVFH32-NEXT: slli a1, a1, 15
237
- ; ZVFH32-NEXT: or a2, a2, a3
238
- ; ZVFH32-NEXT: or a1, a2, a1
223
+ ; ZVFH32-NEXT: vmv.x.s a2, v8
224
+ ; ZVFH32-NEXT: slli a1, a1, 16
225
+ ; ZVFH32-NEXT: srli a1, a1, 16
226
+ ; ZVFH32-NEXT: slli a3, a2, 30
227
+ ; ZVFH32-NEXT: or a1, a1, a3
228
+ ; ZVFH32-NEXT: vmv.x.s a3, v9
229
+ ; ZVFH32-NEXT: slli a2, a2, 17
230
+ ; ZVFH32-NEXT: slli a3, a3, 16
231
+ ; ZVFH32-NEXT: srli a2, a2, 19
232
+ ; ZVFH32-NEXT: srli a3, a3, 1
233
+ ; ZVFH32-NEXT: or a1, a1, a3
239
234
; ZVFH32-NEXT: sw a1, 0(a0)
240
- ; ZVFH32-NEXT: sh a4 , 4(a0)
235
+ ; ZVFH32-NEXT: sh a2 , 4(a0)
241
236
; ZVFH32-NEXT: ret
242
237
;
243
238
; ZVFH64-LABEL: fp2ui_v3f32_v3i15:
244
239
; ZVFH64: # %bb.0:
245
240
; ZVFH64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
246
241
; ZVFH64-NEXT: vfncvt.rtz.x.f.w v9, v8
247
- ; ZVFH64-NEXT: lui a1, 16
248
- ; ZVFH64-NEXT: vmv.x.s a2, v9
249
- ; ZVFH64-NEXT: addiw a1, a1, -1
242
+ ; ZVFH64-NEXT: vmv.x.s a1, v9
250
243
; ZVFH64-NEXT: vslidedown.vi v8, v9, 1
251
244
; ZVFH64-NEXT: vslidedown.vi v9, v9, 2
252
- ; ZVFH64-NEXT: and a2, a2, a1
253
- ; ZVFH64-NEXT: vmv.x.s a3, v8
254
- ; ZVFH64-NEXT: and a1, a3, a1
245
+ ; ZVFH64-NEXT: slli a1, a1, 48
246
+ ; ZVFH64-NEXT: vmv.x.s a2, v8
255
247
; ZVFH64-NEXT: vmv.x.s a3, v9
248
+ ; ZVFH64-NEXT: srli a1, a1, 48
249
+ ; ZVFH64-NEXT: slli a2, a2, 48
256
250
; ZVFH64-NEXT: slli a3, a3, 30
257
- ; ZVFH64-NEXT: slli a1, a1, 15
258
- ; ZVFH64-NEXT: or a2, a2 , a3
259
- ; ZVFH64-NEXT: or a1, a2, a1
251
+ ; ZVFH64-NEXT: srli a2, a2, 33
252
+ ; ZVFH64-NEXT: or a1, a1 , a3
253
+ ; ZVFH64-NEXT: or a1, a1, a2
260
254
; ZVFH64-NEXT: slli a2, a1, 19
261
255
; ZVFH64-NEXT: srli a2, a2, 51
262
256
; ZVFH64-NEXT: sw a1, 0(a0)
@@ -267,42 +261,40 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
267
261
; ZVFHMIN32: # %bb.0:
268
262
; ZVFHMIN32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
269
263
; ZVFHMIN32-NEXT: vfncvt.rtz.x.f.w v9, v8
270
- ; ZVFHMIN32-NEXT: lui a1, 16
271
264
; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 2
272
- ; ZVFHMIN32-NEXT: vmv.x.s a2, v9
273
- ; ZVFHMIN32-NEXT: addi a1, a1, -1
265
+ ; ZVFHMIN32-NEXT: vmv.x.s a1, v9
274
266
; ZVFHMIN32-NEXT: vslidedown.vi v9, v9, 1
275
- ; ZVFHMIN32-NEXT: vmv.x.s a3, v8
276
- ; ZVFHMIN32-NEXT: and a2, a2, a1
277
- ; ZVFHMIN32-NEXT: vmv.x.s a4, v9
278
- ; ZVFHMIN32-NEXT: and a1, a4, a1
279
- ; ZVFHMIN32-NEXT: slli a4, a3, 17
280
- ; ZVFHMIN32-NEXT: slli a3, a3, 30
281
- ; ZVFHMIN32-NEXT: srli a4, a4, 19
282
- ; ZVFHMIN32-NEXT: slli a1, a1, 15
283
- ; ZVFHMIN32-NEXT: or a2, a2, a3
284
- ; ZVFHMIN32-NEXT: or a1, a2, a1
267
+ ; ZVFHMIN32-NEXT: vmv.x.s a2, v8
268
+ ; ZVFHMIN32-NEXT: slli a1, a1, 16
269
+ ; ZVFHMIN32-NEXT: srli a1, a1, 16
270
+ ; ZVFHMIN32-NEXT: slli a3, a2, 30
271
+ ; ZVFHMIN32-NEXT: or a1, a1, a3
272
+ ; ZVFHMIN32-NEXT: vmv.x.s a3, v9
273
+ ; ZVFHMIN32-NEXT: slli a2, a2, 17
274
+ ; ZVFHMIN32-NEXT: slli a3, a3, 16
275
+ ; ZVFHMIN32-NEXT: srli a2, a2, 19
276
+ ; ZVFHMIN32-NEXT: srli a3, a3, 1
277
+ ; ZVFHMIN32-NEXT: or a1, a1, a3
285
278
; ZVFHMIN32-NEXT: sw a1, 0(a0)
286
- ; ZVFHMIN32-NEXT: sh a4 , 4(a0)
279
+ ; ZVFHMIN32-NEXT: sh a2 , 4(a0)
287
280
; ZVFHMIN32-NEXT: ret
288
281
;
289
282
; ZVFHMIN64-LABEL: fp2ui_v3f32_v3i15:
290
283
; ZVFHMIN64: # %bb.0:
291
284
; ZVFHMIN64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
292
285
; ZVFHMIN64-NEXT: vfncvt.rtz.x.f.w v9, v8
293
- ; ZVFHMIN64-NEXT: lui a1, 16
294
- ; ZVFHMIN64-NEXT: vmv.x.s a2, v9
295
- ; ZVFHMIN64-NEXT: addiw a1, a1, -1
286
+ ; ZVFHMIN64-NEXT: vmv.x.s a1, v9
296
287
; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 1
297
288
; ZVFHMIN64-NEXT: vslidedown.vi v9, v9, 2
298
- ; ZVFHMIN64-NEXT: and a2, a2, a1
299
- ; ZVFHMIN64-NEXT: vmv.x.s a3, v8
300
- ; ZVFHMIN64-NEXT: and a1, a3, a1
289
+ ; ZVFHMIN64-NEXT: slli a1, a1, 48
290
+ ; ZVFHMIN64-NEXT: vmv.x.s a2, v8
301
291
; ZVFHMIN64-NEXT: vmv.x.s a3, v9
292
+ ; ZVFHMIN64-NEXT: srli a1, a1, 48
293
+ ; ZVFHMIN64-NEXT: slli a2, a2, 48
302
294
; ZVFHMIN64-NEXT: slli a3, a3, 30
303
- ; ZVFHMIN64-NEXT: slli a1, a1, 15
304
- ; ZVFHMIN64-NEXT: or a2, a2 , a3
305
- ; ZVFHMIN64-NEXT: or a1, a2, a1
295
+ ; ZVFHMIN64-NEXT: srli a2, a2, 33
296
+ ; ZVFHMIN64-NEXT: or a1, a1 , a3
297
+ ; ZVFHMIN64-NEXT: or a1, a1, a2
306
298
; ZVFHMIN64-NEXT: slli a2, a1, 19
307
299
; ZVFHMIN64-NEXT: srli a2, a2, 51
308
300
; ZVFHMIN64-NEXT: sw a1, 0(a0)
0 commit comments