@@ -98,33 +98,45 @@ define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
98
98
ret void
99
99
}
100
100
101
+ ; FIXME: This can be a signed vmax followed by vnclipu.
101
102
define void @trunc_sat_u8u16_maxmin (ptr %x , ptr %y ) {
102
103
; CHECK-LABEL: trunc_sat_u8u16_maxmin:
103
104
; CHECK: # %bb.0:
104
- ; CHECK-NEXT: vsetivli zero, 4, e8, mf4 , ta, ma
105
+ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2 , ta, ma
105
106
; CHECK-NEXT: vle16.v v8, (a0)
106
- ; CHECK-NEXT: vnclipu.wi v8, v8, 0
107
+ ; CHECK-NEXT: vmax.vx v8, v8, zero
108
+ ; CHECK-NEXT: li a0, 255
109
+ ; CHECK-NEXT: vmin.vx v8, v8, a0
110
+ ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
111
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
107
112
; CHECK-NEXT: vse8.v v8, (a1)
108
113
; CHECK-NEXT: ret
109
114
%1 = load <4 x i16 >, ptr %x , align 16
110
- %2 = tail call <4 x i16 > @llvm.umin.v4i16 (<4 x i16 > %1 , <4 x i16 > <i16 255 , i16 255 , i16 255 , i16 255 >)
111
- %3 = trunc <4 x i16 > %2 to <4 x i8 >
112
- store <4 x i8 > %3 , ptr %y , align 8
115
+ %2 = tail call <4 x i16 > @llvm.smax.v4i16 (<4 x i16 > %1 , <4 x i16 > zeroinitializer )
116
+ %3 = tail call <4 x i16 > @llvm.smin.v4i16 (<4 x i16 > %2 , <4 x i16 > <i16 255 , i16 255 , i16 255 , i16 255 >)
117
+ %4 = trunc <4 x i16 > %3 to <4 x i8 >
118
+ store <4 x i8 > %4 , ptr %y , align 8
113
119
ret void
114
120
}
115
121
122
+ ; FIXME: This can be a signed vmax followed by vnclipu.
116
123
define void @trunc_sat_u8u16_minmax (ptr %x , ptr %y ) {
117
124
; CHECK-LABEL: trunc_sat_u8u16_minmax:
118
125
; CHECK: # %bb.0:
119
- ; CHECK-NEXT: vsetivli zero, 4, e8, mf4 , ta, ma
126
+ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2 , ta, ma
120
127
; CHECK-NEXT: vle16.v v8, (a0)
121
- ; CHECK-NEXT: vnclipu.wi v8, v8, 0
128
+ ; CHECK-NEXT: li a0, 255
129
+ ; CHECK-NEXT: vmin.vx v8, v8, a0
130
+ ; CHECK-NEXT: vmax.vx v8, v8, zero
131
+ ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
132
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
122
133
; CHECK-NEXT: vse8.v v8, (a1)
123
134
; CHECK-NEXT: ret
124
135
%1 = load <4 x i16 >, ptr %x , align 16
125
- %2 = tail call <4 x i16 > @llvm.umin.v4i16 (<4 x i16 > %1 , <4 x i16 > <i16 255 , i16 255 , i16 255 , i16 255 >)
126
- %3 = trunc <4 x i16 > %2 to <4 x i8 >
127
- store <4 x i8 > %3 , ptr %y , align 8
136
+ %2 = tail call <4 x i16 > @llvm.smin.v4i16 (<4 x i16 > %1 , <4 x i16 > <i16 255 , i16 255 , i16 255 , i16 255 >)
137
+ %3 = tail call <4 x i16 > @llvm.smax.v4i16 (<4 x i16 > %2 , <4 x i16 > zeroinitializer )
138
+ %4 = trunc <4 x i16 > %3 to <4 x i8 >
139
+ store <4 x i8 > %4 , ptr %y , align 8
128
140
ret void
129
141
}
130
142
@@ -217,33 +229,49 @@ define void @trunc_sat_u16u32_min(ptr %x, ptr %y) {
217
229
ret void
218
230
}
219
231
220
- define void @trunc_sat_u16u32_minmax (ptr %x , ptr %y ) {
221
- ; CHECK-LABEL: trunc_sat_u16u32_minmax:
232
+ ; FIXME: This can be a signed vmax followed by vnclipu.
233
+ define void @trunc_sat_u16u32_maxmin (ptr %x , ptr %y ) {
234
+ ; CHECK-LABEL: trunc_sat_u16u32_maxmin:
222
235
; CHECK: # %bb.0:
223
- ; CHECK-NEXT: vsetivli zero, 4, e16, mf2 , ta, ma
236
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1 , ta, ma
224
237
; CHECK-NEXT: vle32.v v8, (a0)
225
- ; CHECK-NEXT: vnclipu.wi v8, v8, 0
238
+ ; CHECK-NEXT: li a0, 1
239
+ ; CHECK-NEXT: vmax.vx v8, v8, a0
240
+ ; CHECK-NEXT: lui a0, 16
241
+ ; CHECK-NEXT: addi a0, a0, -1
242
+ ; CHECK-NEXT: vmin.vx v8, v8, a0
243
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
244
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
226
245
; CHECK-NEXT: vse16.v v8, (a1)
227
246
; CHECK-NEXT: ret
228
- %1 = load <4 x i32 >, ptr %x , align 32
229
- %2 = tail call <4 x i32 > @llvm.umin.v4i32 (<4 x i32 > %1 , <4 x i32 > <i32 65535 , i32 65535 , i32 65535 , i32 65535 >)
230
- %3 = trunc <4 x i32 > %2 to <4 x i16 >
231
- store <4 x i16 > %3 , ptr %y , align 16
247
+ %1 = load <4 x i32 >, ptr %x , align 16
248
+ %2 = tail call <4 x i32 > @llvm.smax.v4i32 (<4 x i32 > %1 , <4 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 >)
249
+ %3 = tail call <4 x i32 > @llvm.smin.v4i32 (<4 x i32 > %2 , <4 x i32 > <i32 65535 , i32 65535 , i32 65535 , i32 65535 >)
250
+ %4 = trunc <4 x i32 > %3 to <4 x i16 >
251
+ store <4 x i16 > %4 , ptr %y , align 8
232
252
ret void
233
253
}
234
254
235
- define void @trunc_sat_u16u32_maxmin (ptr %x , ptr %y ) {
236
- ; CHECK-LABEL: trunc_sat_u16u32_maxmin:
255
+ ; FIXME: This can be a signed vmax followed by vnclipu.
256
+ define void @trunc_sat_u16u32_minmax (ptr %x , ptr %y ) {
257
+ ; CHECK-LABEL: trunc_sat_u16u32_minmax:
237
258
; CHECK: # %bb.0:
238
- ; CHECK-NEXT: vsetivli zero, 4, e16, mf2 , ta, ma
259
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1 , ta, ma
239
260
; CHECK-NEXT: vle32.v v8, (a0)
240
- ; CHECK-NEXT: vnclipu.wi v8, v8, 0
261
+ ; CHECK-NEXT: lui a0, 16
262
+ ; CHECK-NEXT: addi a0, a0, -1
263
+ ; CHECK-NEXT: vmin.vx v8, v8, a0
264
+ ; CHECK-NEXT: li a0, 50
265
+ ; CHECK-NEXT: vmax.vx v8, v8, a0
266
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
267
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
241
268
; CHECK-NEXT: vse16.v v8, (a1)
242
269
; CHECK-NEXT: ret
243
- %1 = load <4 x i32 >, ptr %x , align 32
244
- %2 = tail call <4 x i32 > @llvm.umin.v4i32 (<4 x i32 > %1 , <4 x i32 > <i32 65535 , i32 65535 , i32 65535 , i32 65535 >)
245
- %3 = trunc <4 x i32 > %2 to <4 x i16 >
246
- store <4 x i16 > %3 , ptr %y , align 16
270
+ %1 = load <4 x i32 >, ptr %x , align 16
271
+ %2 = tail call <4 x i32 > @llvm.smin.v4i32 (<4 x i32 > %1 , <4 x i32 > <i32 65535 , i32 65535 , i32 65535 , i32 65535 >)
272
+ %3 = tail call <4 x i32 > @llvm.smax.v4i32 (<4 x i32 > %2 , <4 x i32 > <i32 50 , i32 50 , i32 50 , i32 50 >)
273
+ %4 = trunc <4 x i32 > %3 to <4 x i16 >
274
+ store <4 x i16 > %4 , ptr %y , align 8
247
275
ret void
248
276
}
249
277
@@ -339,32 +367,46 @@ define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
339
367
}
340
368
341
369
370
+ ; FIXME: This can be a signed vmax followed by vnclipu.
342
371
define void @trunc_sat_u32u64_maxmin (ptr %x , ptr %y ) {
343
372
; CHECK-LABEL: trunc_sat_u32u64_maxmin:
344
373
; CHECK: # %bb.0:
345
- ; CHECK-NEXT: vsetivli zero, 4, e32, m1 , ta, ma
374
+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2 , ta, ma
346
375
; CHECK-NEXT: vle64.v v8, (a0)
347
- ; CHECK-NEXT: vnclipu.wi v10, v8, 0
376
+ ; CHECK-NEXT: vmax.vx v8, v8, zero
377
+ ; CHECK-NEXT: li a0, -1
378
+ ; CHECK-NEXT: srli a0, a0, 32
379
+ ; CHECK-NEXT: vmin.vx v8, v8, a0
380
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
381
+ ; CHECK-NEXT: vnsrl.wi v10, v8, 0
348
382
; CHECK-NEXT: vse32.v v10, (a1)
349
383
; CHECK-NEXT: ret
350
- %1 = load <4 x i64 >, ptr %x , align 64
351
- %2 = tail call <4 x i64 > @llvm.umin.v4i64 (<4 x i64 > %1 , <4 x i64 > <i64 4294967295 , i64 4294967295 , i64 4294967295 , i64 4294967295 >)
352
- %3 = trunc <4 x i64 > %2 to <4 x i32 >
353
- store <4 x i32 > %3 , ptr %y , align 32
384
+ %1 = load <4 x i64 >, ptr %x , align 16
385
+ %2 = tail call <4 x i64 > @llvm.smax.v4i64 (<4 x i64 > %1 , <4 x i64 > zeroinitializer )
386
+ %3 = tail call <4 x i64 > @llvm.smin.v4i64 (<4 x i64 > %2 , <4 x i64 > <i64 4294967295 , i64 4294967295 , i64 4294967295 , i64 4294967295 >)
387
+ %4 = trunc <4 x i64 > %3 to <4 x i32 >
388
+ store <4 x i32 > %4 , ptr %y , align 8
354
389
ret void
355
390
}
356
391
392
+ ; FIXME: This can be a signed vmax followed by vnclipu.
357
393
define void @trunc_sat_u32u64_minmax (ptr %x , ptr %y ) {
358
394
; CHECK-LABEL: trunc_sat_u32u64_minmax:
359
395
; CHECK: # %bb.0:
360
- ; CHECK-NEXT: vsetivli zero, 4, e32, m1 , ta, ma
396
+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2 , ta, ma
361
397
; CHECK-NEXT: vle64.v v8, (a0)
362
- ; CHECK-NEXT: vnclipu.wi v10, v8, 0
398
+ ; CHECK-NEXT: li a0, -1
399
+ ; CHECK-NEXT: srli a0, a0, 32
400
+ ; CHECK-NEXT: vmin.vx v8, v8, a0
401
+ ; CHECK-NEXT: vmax.vx v8, v8, zero
402
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
403
+ ; CHECK-NEXT: vnsrl.wi v10, v8, 0
363
404
; CHECK-NEXT: vse32.v v10, (a1)
364
405
; CHECK-NEXT: ret
365
- %1 = load <4 x i64 >, ptr %x , align 64
366
- %2 = tail call <4 x i64 > @llvm.umin.v4i64 (<4 x i64 > %1 , <4 x i64 > <i64 4294967295 , i64 4294967295 , i64 4294967295 , i64 4294967295 >)
367
- %3 = trunc <4 x i64 > %2 to <4 x i32 >
368
- store <4 x i32 > %3 , ptr %y , align 32
406
+ %1 = load <4 x i64 >, ptr %x , align 16
407
+ %2 = tail call <4 x i64 > @llvm.smin.v4i64 (<4 x i64 > %1 , <4 x i64 > <i64 4294967295 , i64 4294967295 , i64 4294967295 , i64 4294967295 >)
408
+ %3 = tail call <4 x i64 > @llvm.smax.v4i64 (<4 x i64 > %2 , <4 x i64 > zeroinitializer )
409
+ %4 = trunc <4 x i64 > %3 to <4 x i32 >
410
+ store <4 x i32 > %4 , ptr %y , align 8
369
411
ret void
370
412
}
0 commit comments