Skip to content

Commit e06e680

Browse files
authored
[RISCV] Replace duplicate trunc-sat-clip tests with more interesting tests. NFC (#93737)
For each pair of types, we had 3 identical tests using umin with the unsigned max value. This patch replaces two of them with smin+smax cases that can be implemented with a signed vmax followed by a vnclipu.
1 parent 6e7b45c commit e06e680

File tree

2 files changed

+160
-76
lines changed

2 files changed

+160
-76
lines changed

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll

Lines changed: 80 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -98,33 +98,45 @@ define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
9898
ret void
9999
}
100100

101+
; FIXME: This can be a signed vmax followed by vnclipu.
101102
define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
102103
; CHECK-LABEL: trunc_sat_u8u16_maxmin:
103104
; CHECK: # %bb.0:
104-
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
105+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
105106
; CHECK-NEXT: vle16.v v8, (a0)
106-
; CHECK-NEXT: vnclipu.wi v8, v8, 0
107+
; CHECK-NEXT: vmax.vx v8, v8, zero
108+
; CHECK-NEXT: li a0, 255
109+
; CHECK-NEXT: vmin.vx v8, v8, a0
110+
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
111+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
107112
; CHECK-NEXT: vse8.v v8, (a1)
108113
; CHECK-NEXT: ret
109114
%1 = load <4 x i16>, ptr %x, align 16
110-
%2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
111-
%3 = trunc <4 x i16> %2 to <4 x i8>
112-
store <4 x i8> %3, ptr %y, align 8
115+
%2 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %1, <4 x i16> zeroinitializer)
116+
%3 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %2, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
117+
%4 = trunc <4 x i16> %3 to <4 x i8>
118+
store <4 x i8> %4, ptr %y, align 8
113119
ret void
114120
}
115121

122+
; FIXME: This can be a signed vmax followed by vnclipu.
116123
define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
117124
; CHECK-LABEL: trunc_sat_u8u16_minmax:
118125
; CHECK: # %bb.0:
119-
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
126+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
120127
; CHECK-NEXT: vle16.v v8, (a0)
121-
; CHECK-NEXT: vnclipu.wi v8, v8, 0
128+
; CHECK-NEXT: li a0, 255
129+
; CHECK-NEXT: vmin.vx v8, v8, a0
130+
; CHECK-NEXT: vmax.vx v8, v8, zero
131+
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
132+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
122133
; CHECK-NEXT: vse8.v v8, (a1)
123134
; CHECK-NEXT: ret
124135
%1 = load <4 x i16>, ptr %x, align 16
125-
%2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
126-
%3 = trunc <4 x i16> %2 to <4 x i8>
127-
store <4 x i8> %3, ptr %y, align 8
136+
%2 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
137+
%3 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %2, <4 x i16> zeroinitializer)
138+
%4 = trunc <4 x i16> %3 to <4 x i8>
139+
store <4 x i8> %4, ptr %y, align 8
128140
ret void
129141
}
130142

@@ -217,33 +229,49 @@ define void @trunc_sat_u16u32_min(ptr %x, ptr %y) {
217229
ret void
218230
}
219231

220-
define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
221-
; CHECK-LABEL: trunc_sat_u16u32_minmax:
232+
; FIXME: This can be a signed vmax followed by vnclipu.
233+
define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
234+
; CHECK-LABEL: trunc_sat_u16u32_maxmin:
222235
; CHECK: # %bb.0:
223-
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
236+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
224237
; CHECK-NEXT: vle32.v v8, (a0)
225-
; CHECK-NEXT: vnclipu.wi v8, v8, 0
238+
; CHECK-NEXT: li a0, 1
239+
; CHECK-NEXT: vmax.vx v8, v8, a0
240+
; CHECK-NEXT: lui a0, 16
241+
; CHECK-NEXT: addi a0, a0, -1
242+
; CHECK-NEXT: vmin.vx v8, v8, a0
243+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
244+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
226245
; CHECK-NEXT: vse16.v v8, (a1)
227246
; CHECK-NEXT: ret
228-
%1 = load <4 x i32>, ptr %x, align 32
229-
%2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
230-
%3 = trunc <4 x i32> %2 to <4 x i16>
231-
store <4 x i16> %3, ptr %y, align 16
247+
%1 = load <4 x i32>, ptr %x, align 16
248+
%2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
249+
%3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
250+
%4 = trunc <4 x i32> %3 to <4 x i16>
251+
store <4 x i16> %4, ptr %y, align 8
232252
ret void
233253
}
234254

235-
define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
236-
; CHECK-LABEL: trunc_sat_u16u32_maxmin:
255+
; FIXME: This can be a signed vmax followed by vnclipu.
256+
define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
257+
; CHECK-LABEL: trunc_sat_u16u32_minmax:
237258
; CHECK: # %bb.0:
238-
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
259+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
239260
; CHECK-NEXT: vle32.v v8, (a0)
240-
; CHECK-NEXT: vnclipu.wi v8, v8, 0
261+
; CHECK-NEXT: lui a0, 16
262+
; CHECK-NEXT: addi a0, a0, -1
263+
; CHECK-NEXT: vmin.vx v8, v8, a0
264+
; CHECK-NEXT: li a0, 50
265+
; CHECK-NEXT: vmax.vx v8, v8, a0
266+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
267+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
241268
; CHECK-NEXT: vse16.v v8, (a1)
242269
; CHECK-NEXT: ret
243-
%1 = load <4 x i32>, ptr %x, align 32
244-
%2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
245-
%3 = trunc <4 x i32> %2 to <4 x i16>
246-
store <4 x i16> %3, ptr %y, align 16
270+
%1 = load <4 x i32>, ptr %x, align 16
271+
%2 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
272+
%3 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %2, <4 x i32> <i32 50, i32 50, i32 50, i32 50>)
273+
%4 = trunc <4 x i32> %3 to <4 x i16>
274+
store <4 x i16> %4, ptr %y, align 8
247275
ret void
248276
}
249277

@@ -339,32 +367,46 @@ define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
339367
}
340368

341369

370+
; FIXME: This can be a signed vmax followed by vnclipu.
342371
define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
343372
; CHECK-LABEL: trunc_sat_u32u64_maxmin:
344373
; CHECK: # %bb.0:
345-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
374+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
346375
; CHECK-NEXT: vle64.v v8, (a0)
347-
; CHECK-NEXT: vnclipu.wi v10, v8, 0
376+
; CHECK-NEXT: vmax.vx v8, v8, zero
377+
; CHECK-NEXT: li a0, -1
378+
; CHECK-NEXT: srli a0, a0, 32
379+
; CHECK-NEXT: vmin.vx v8, v8, a0
380+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
381+
; CHECK-NEXT: vnsrl.wi v10, v8, 0
348382
; CHECK-NEXT: vse32.v v10, (a1)
349383
; CHECK-NEXT: ret
350-
%1 = load <4 x i64>, ptr %x, align 64
351-
%2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
352-
%3 = trunc <4 x i64> %2 to <4 x i32>
353-
store <4 x i32> %3, ptr %y, align 32
384+
%1 = load <4 x i64>, ptr %x, align 16
385+
%2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> zeroinitializer)
386+
%3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
387+
%4 = trunc <4 x i64> %3 to <4 x i32>
388+
store <4 x i32> %4, ptr %y, align 8
354389
ret void
355390
}
356391

392+
; FIXME: This can be a signed vmax followed by vnclipu.
357393
define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
358394
; CHECK-LABEL: trunc_sat_u32u64_minmax:
359395
; CHECK: # %bb.0:
360-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
396+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
361397
; CHECK-NEXT: vle64.v v8, (a0)
362-
; CHECK-NEXT: vnclipu.wi v10, v8, 0
398+
; CHECK-NEXT: li a0, -1
399+
; CHECK-NEXT: srli a0, a0, 32
400+
; CHECK-NEXT: vmin.vx v8, v8, a0
401+
; CHECK-NEXT: vmax.vx v8, v8, zero
402+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
403+
; CHECK-NEXT: vnsrl.wi v10, v8, 0
363404
; CHECK-NEXT: vse32.v v10, (a1)
364405
; CHECK-NEXT: ret
365-
%1 = load <4 x i64>, ptr %x, align 64
366-
%2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
367-
%3 = trunc <4 x i64> %2 to <4 x i32>
368-
store <4 x i32> %3, ptr %y, align 32
406+
%1 = load <4 x i64>, ptr %x, align 16
407+
%2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
408+
%3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> zeroinitializer)
409+
%4 = trunc <4 x i64> %3 to <4 x i32>
410+
store <4 x i32> %4, ptr %y, align 8
369411
ret void
370412
}

llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll

Lines changed: 80 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -98,33 +98,45 @@ define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
9898
ret void
9999
}
100100

101+
; FIXME: This can be a signed vmax followed by vnclipu.
101102
define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
102103
; CHECK-LABEL: trunc_sat_u8u16_maxmin:
103104
; CHECK: # %bb.0:
104105
; CHECK-NEXT: vl1re16.v v8, (a0)
105-
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
106-
; CHECK-NEXT: vnclipu.wi v8, v8, 0
106+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
107+
; CHECK-NEXT: vmax.vx v8, v8, zero
108+
; CHECK-NEXT: li a0, 255
109+
; CHECK-NEXT: vmin.vx v8, v8, a0
110+
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
111+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
107112
; CHECK-NEXT: vse8.v v8, (a1)
108113
; CHECK-NEXT: ret
109114
%1 = load <vscale x 4 x i16>, ptr %x, align 16
110-
%2 = tail call <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 255))
111-
%3 = trunc <vscale x 4 x i16> %2 to <vscale x 4 x i8>
112-
store <vscale x 4 x i8> %3, ptr %y, align 8
115+
%2 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 0))
116+
%3 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 255))
117+
%4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8>
118+
store <vscale x 4 x i8> %4, ptr %y, align 8
113119
ret void
114120
}
115121

122+
; FIXME: This can be a signed vmax followed by vnclipu.
116123
define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
117124
; CHECK-LABEL: trunc_sat_u8u16_minmax:
118125
; CHECK: # %bb.0:
119126
; CHECK-NEXT: vl1re16.v v8, (a0)
120-
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
121-
; CHECK-NEXT: vnclipu.wi v8, v8, 0
127+
; CHECK-NEXT: li a0, 255
128+
; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
129+
; CHECK-NEXT: vmin.vx v8, v8, a0
130+
; CHECK-NEXT: vmax.vx v8, v8, zero
131+
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
132+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
122133
; CHECK-NEXT: vse8.v v8, (a1)
123134
; CHECK-NEXT: ret
124135
%1 = load <vscale x 4 x i16>, ptr %x, align 16
125-
%2 = tail call <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 255))
126-
%3 = trunc <vscale x 4 x i16> %2 to <vscale x 4 x i8>
127-
store <vscale x 4 x i8> %3, ptr %y, align 8
136+
%2 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 255))
137+
%3 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 0))
138+
%4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8>
139+
store <vscale x 4 x i8> %4, ptr %y, align 8
128140
ret void
129141
}
130142

@@ -217,33 +229,49 @@ define void @trunc_sat_u16u32_min(ptr %x, ptr %y) {
217229
ret void
218230
}
219231

220-
define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
221-
; CHECK-LABEL: trunc_sat_u16u32_minmax:
232+
; FIXME: This can be a signed vmax followed by vnclipu.
233+
define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
234+
; CHECK-LABEL: trunc_sat_u16u32_maxmin:
222235
; CHECK: # %bb.0:
223236
; CHECK-NEXT: vl2re32.v v8, (a0)
224-
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
225-
; CHECK-NEXT: vnclipu.wi v10, v8, 0
237+
; CHECK-NEXT: li a0, 1
238+
; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
239+
; CHECK-NEXT: vmax.vx v8, v8, a0
240+
; CHECK-NEXT: lui a0, 16
241+
; CHECK-NEXT: addi a0, a0, -1
242+
; CHECK-NEXT: vmin.vx v8, v8, a0
243+
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
244+
; CHECK-NEXT: vnsrl.wi v10, v8, 0
226245
; CHECK-NEXT: vs1r.v v10, (a1)
227246
; CHECK-NEXT: ret
228-
%1 = load <vscale x 4 x i32>, ptr %x, align 32
229-
%2 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 65535))
230-
%3 = trunc <vscale x 4 x i32> %2 to <vscale x 4 x i16>
231-
store <vscale x 4 x i16> %3, ptr %y, align 16
247+
%1 = load <vscale x 4 x i32>, ptr %x, align 16
248+
%2 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 1))
249+
%3 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 65535))
250+
%4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16>
251+
store <vscale x 4 x i16> %4, ptr %y, align 8
232252
ret void
233253
}
234254

235-
define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
236-
; CHECK-LABEL: trunc_sat_u16u32_maxmin:
255+
; FIXME: This can be a signed vmax followed by vnclipu.
256+
define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
257+
; CHECK-LABEL: trunc_sat_u16u32_minmax:
237258
; CHECK: # %bb.0:
238259
; CHECK-NEXT: vl2re32.v v8, (a0)
239-
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
240-
; CHECK-NEXT: vnclipu.wi v10, v8, 0
260+
; CHECK-NEXT: lui a0, 16
261+
; CHECK-NEXT: addi a0, a0, -1
262+
; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
263+
; CHECK-NEXT: vmin.vx v8, v8, a0
264+
; CHECK-NEXT: li a0, 50
265+
; CHECK-NEXT: vmax.vx v8, v8, a0
266+
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
267+
; CHECK-NEXT: vnsrl.wi v10, v8, 0
241268
; CHECK-NEXT: vs1r.v v10, (a1)
242269
; CHECK-NEXT: ret
243-
%1 = load <vscale x 4 x i32>, ptr %x, align 32
244-
%2 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 65535))
245-
%3 = trunc <vscale x 4 x i32> %2 to <vscale x 4 x i16>
246-
store <vscale x 4 x i16> %3, ptr %y, align 16
270+
%1 = load <vscale x 4 x i32>, ptr %x, align 16
271+
%2 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 65535))
272+
%3 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 50))
273+
%4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16>
274+
store <vscale x 4 x i16> %4, ptr %y, align 8
247275
ret void
248276
}
249277

@@ -339,32 +367,46 @@ define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
339367
}
340368

341369

370+
; FIXME: This can be a signed vmax followed by vnclipu.
342371
define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
343372
; CHECK-LABEL: trunc_sat_u32u64_maxmin:
344373
; CHECK: # %bb.0:
345374
; CHECK-NEXT: vl4re64.v v8, (a0)
346-
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
347-
; CHECK-NEXT: vnclipu.wi v12, v8, 0
375+
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
376+
; CHECK-NEXT: vmax.vx v8, v8, zero
377+
; CHECK-NEXT: li a0, -1
378+
; CHECK-NEXT: srli a0, a0, 32
379+
; CHECK-NEXT: vmin.vx v8, v8, a0
380+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
381+
; CHECK-NEXT: vnsrl.wi v12, v8, 0
348382
; CHECK-NEXT: vs2r.v v12, (a1)
349383
; CHECK-NEXT: ret
350-
%1 = load <vscale x 4 x i64>, ptr %x, align 64
351-
%2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 4294967295))
352-
%3 = trunc <vscale x 4 x i64> %2 to <vscale x 4 x i32>
353-
store <vscale x 4 x i32> %3, ptr %y, align 32
384+
%1 = load <vscale x 4 x i64>, ptr %x, align 16
385+
%2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 0))
386+
%3 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 4294967295))
387+
%4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32>
388+
store <vscale x 4 x i32> %4, ptr %y, align 8
354389
ret void
355390
}
356391

392+
; FIXME: This can be a signed vmax followed by vnclipu.
357393
define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
358394
; CHECK-LABEL: trunc_sat_u32u64_minmax:
359395
; CHECK: # %bb.0:
360396
; CHECK-NEXT: vl4re64.v v8, (a0)
361-
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
362-
; CHECK-NEXT: vnclipu.wi v12, v8, 0
397+
; CHECK-NEXT: li a0, -1
398+
; CHECK-NEXT: srli a0, a0, 32
399+
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
400+
; CHECK-NEXT: vmin.vx v8, v8, a0
401+
; CHECK-NEXT: vmax.vx v8, v8, zero
402+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
403+
; CHECK-NEXT: vnsrl.wi v12, v8, 0
363404
; CHECK-NEXT: vs2r.v v12, (a1)
364405
; CHECK-NEXT: ret
365-
%1 = load <vscale x 4 x i64>, ptr %x, align 64
366-
%2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 4294967295))
367-
%3 = trunc <vscale x 4 x i64> %2 to <vscale x 4 x i32>
368-
store <vscale x 4 x i32> %3, ptr %y, align 32
406+
%1 = load <vscale x 4 x i64>, ptr %x, align 16
407+
%2 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 4294967295))
408+
%3 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 0))
409+
%4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32>
410+
store <vscale x 4 x i32> %4, ptr %y, align 8
369411
ret void
370412
}

0 commit comments

Comments
 (0)