Skip to content

Commit 0afc884

Browse files
authored
[RISCV] Use vnclip for scalable vector saturating truncation. (#88648)
Similar to #75145, but for scalable vectors. Specifically, this patch works for the below optimization case: ## Source Code ``` define void @trunc_sat_i8i16_maxmin(ptr %x, ptr %y) { %1 = load <vscale x 4 x i16>, ptr %x, align 16 %2 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 -128)) %3 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 127)) %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8> store <vscale x 4 x i8> %4, ptr %y, align 8 ret void } ``` ## Before this patch [Compiler Explorer](https://godbolt.org/z/EKc9eGvo8) ``` trunc_sat_i8i16_maxmin: vl1re16.v v8, (a0) li a0, -128 vsetvli a2, zero, e16, m1, ta, ma vmax.vx v8, v8, a0 li a0, 127 vmin.vx v8, v8, a0 vsetvli zero, zero, e8, mf2, ta, ma vnsrl.wi v8, v8, 0 vse8.v v8, (a1) ret ``` ## After this patch ``` trunc_sat_i8i16_maxmin: vsetivli zero, 4, e8, mf4, ta, ma vle16.v v8, (a0) vnclip.wi v8, v8, 0 vse8.v v8, (a1) ret ```
1 parent 808d794 commit 0afc884

File tree

4 files changed

+455
-61
lines changed

4 files changed

+455
-61
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,6 +1166,47 @@ defm : VPatBinarySDNode_VV_VX<usubsat, "PseudoVSSUBU">;
11661166
defm : VPatAVGADD_VV_VX_RM<avgflooru, 0b10>;
11671167
defm : VPatAVGADD_VV_VX_RM<avgceilu, 0b00>;
11681168

1169+
// 12.5. Vector Narrowing Fixed-Point Clip Instructions
1170+
multiclass VPatTruncSatClipSDNode<VTypeInfo vti, VTypeInfo wti> {
1171+
defvar sew = vti.SEW;
1172+
defvar uminval = !sub(!shl(1, sew), 1);
1173+
defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1);
1174+
defvar smaxval = !sub(0, !shl(1, !sub(sew, 1)));
1175+
1176+
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
1177+
GetVTypePredicates<wti>.Predicates) in {
1178+
def : Pat<(vti.Vector (riscv_trunc_vector_vl
1179+
(wti.Vector (smin
1180+
(wti.Vector (smax (wti.Vector wti.RegClass:$rs1),
1181+
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), smaxval, (XLenVT srcvalue))))),
1182+
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), sminval, (XLenVT srcvalue))))),
1183+
(vti.Mask V0), VLOpFrag)),
1184+
(!cast<Instruction>("PseudoVNCLIP_WI_"#vti.LMul.MX#"_MASK")
1185+
(vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
1186+
(vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
1187+
1188+
def : Pat<(vti.Vector (riscv_trunc_vector_vl
1189+
(wti.Vector (smax
1190+
(wti.Vector (smin (wti.Vector wti.RegClass:$rs1),
1191+
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), sminval, (XLenVT srcvalue))))),
1192+
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), smaxval, (XLenVT srcvalue))))),
1193+
(vti.Mask V0), VLOpFrag)),
1194+
(!cast<Instruction>("PseudoVNCLIP_WI_"#vti.LMul.MX#"_MASK")
1195+
(vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
1196+
(vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
1197+
1198+
def : Pat<(vti.Vector (riscv_trunc_vector_vl
1199+
(wti.Vector (umin (wti.Vector wti.RegClass:$rs1),
1200+
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), uminval, (XLenVT srcvalue))))), (vti.Mask V0), VLOpFrag)),
1201+
(!cast<Instruction>("PseudoVNCLIPU_WI_"#vti.LMul.MX#"_MASK")
1202+
(vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
1203+
(vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
1204+
}
1205+
}
1206+
1207+
foreach vtiToWti = AllWidenableIntVectors in
1208+
defm : VPatTruncSatClipSDNode<vtiToWti.Vti, vtiToWti.Wti>;
1209+
11691210
// 15. Vector Mask Instructions
11701211

11711212
// 15.1. Vector Mask-Register Logical Instructions

llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td

Lines changed: 29 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2373,30 +2373,41 @@ defm : VPatAVGADDVL_VV_VX_RM<riscv_avgflooru_vl, 0b10>;
23732373
defm : VPatAVGADDVL_VV_VX_RM<riscv_avgceilu_vl, 0b00>;
23742374

23752375
// 12.5. Vector Narrowing Fixed-Point Clip Instructions
2376-
class VPatTruncSatClipMaxMinBase<string inst,
2377-
VTypeInfo vti,
2378-
VTypeInfo wti,
2379-
SDPatternOperator op1,
2380-
int op1_value,
2381-
SDPatternOperator op2,
2382-
int op2_value> :
2383-
Pat<(vti.Vector (riscv_trunc_vector_vl
2384-
(wti.Vector (op1
2385-
(wti.Vector (op2
2376+
multiclass VPatTruncSatClipVL<VTypeInfo vti, VTypeInfo wti> {
2377+
defvar sew = vti.SEW;
2378+
defvar uminval = !sub(!shl(1, sew), 1);
2379+
defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1);
2380+
defvar smaxval = !sub(0, !shl(1, !sub(sew, 1)));
2381+
2382+
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
2383+
GetVTypePredicates<wti>.Predicates) in {
2384+
def : Pat<(vti.Vector (riscv_trunc_vector_vl
2385+
(wti.Vector (riscv_smin_vl
2386+
(wti.Vector (riscv_smax_vl
23862387
(wti.Vector wti.RegClass:$rs1),
2387-
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op2_value, (XLenVT srcvalue))),
2388+
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), smaxval, (XLenVT srcvalue))),
23882389
(wti.Vector undef),(wti.Mask V0), VLOpFrag)),
2389-
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op1_value, (XLenVT srcvalue))),
2390+
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), sminval, (XLenVT srcvalue))),
23902391
(wti.Vector undef), (wti.Mask V0), VLOpFrag)),
23912392
(vti.Mask V0), VLOpFrag)),
2392-
(!cast<Instruction>(inst#"_WI_"#vti.LMul.MX#"_MASK")
2393+
(!cast<Instruction>("PseudoVNCLIP_WI_"#vti.LMul.MX#"_MASK")
23932394
(vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
23942395
(vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
23952396

2396-
class VPatTruncSatClipUMin<VTypeInfo vti,
2397-
VTypeInfo wti,
2398-
int uminval> :
2399-
Pat<(vti.Vector (riscv_trunc_vector_vl
2397+
def : Pat<(vti.Vector (riscv_trunc_vector_vl
2398+
(wti.Vector (riscv_smax_vl
2399+
(wti.Vector (riscv_smin_vl
2400+
(wti.Vector wti.RegClass:$rs1),
2401+
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), sminval, (XLenVT srcvalue))),
2402+
(wti.Vector undef),(wti.Mask V0), VLOpFrag)),
2403+
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), smaxval, (XLenVT srcvalue))),
2404+
(wti.Vector undef), (wti.Mask V0), VLOpFrag)),
2405+
(vti.Mask V0), VLOpFrag)),
2406+
(!cast<Instruction>("PseudoVNCLIP_WI_"#vti.LMul.MX#"_MASK")
2407+
(vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
2408+
(vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
2409+
2410+
def : Pat<(vti.Vector (riscv_trunc_vector_vl
24002411
(wti.Vector (riscv_umin_vl
24012412
(wti.Vector wti.RegClass:$rs1),
24022413
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), uminval, (XLenVT srcvalue))),
@@ -2405,30 +2416,11 @@ class VPatTruncSatClipUMin<VTypeInfo vti,
24052416
(!cast<Instruction>("PseudoVNCLIPU_WI_"#vti.LMul.MX#"_MASK")
24062417
(vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
24072418
(vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
2408-
2409-
multiclass VPatTruncSatClipMaxMin<string inst, VTypeInfo vti, VTypeInfo wti,
2410-
SDPatternOperator max, int maxval, SDPatternOperator min, int minval> {
2411-
def : VPatTruncSatClipMaxMinBase<inst, vti, wti, max, maxval, min, minval>;
2412-
def : VPatTruncSatClipMaxMinBase<inst, vti, wti, min, minval, max, maxval>;
2413-
}
2414-
2415-
multiclass VPatTruncSatClip<VTypeInfo vti, VTypeInfo wti> {
2416-
defvar sew = vti.SEW;
2417-
defvar uminval = !sub(!shl(1, sew), 1);
2418-
defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1);
2419-
defvar smaxval = !sub(0, !shl(1, !sub(sew, 1)));
2420-
2421-
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
2422-
GetVTypePredicates<wti>.Predicates) in {
2423-
defm : VPatTruncSatClipMaxMin<"PseudoVNCLIP", vti, wti, riscv_smin_vl,
2424-
sminval, riscv_smax_vl, smaxval>;
2425-
def : VPatTruncSatClipUMin<vti, wti, uminval>;
24262419
}
2427-
24282420
}
24292421

24302422
foreach vtiToWti = AllWidenableIntVectors in
2431-
defm : VPatTruncSatClip<vtiToWti.Vti, vtiToWti.Wti>;
2423+
defm : VPatTruncSatClipVL<vtiToWti.Vti, vtiToWti.Wti>;
24322424

24332425
// 13. Vector Floating-Point Instructions
24342426

llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip.ll renamed to llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,8 @@ declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
88
declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
99
declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
1010

11-
declare <4 x i16> @llvm.umax.v4i16(<4 x i16>, <4 x i16>)
1211
declare <4 x i16> @llvm.umin.v4i16(<4 x i16>, <4 x i16>)
13-
declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
1412
declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
15-
declare <4 x i64> @llvm.umax.v4i64(<4 x i64>, <4 x i64>)
1613
declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
1714

1815
define void @trunc_sat_i8i16_maxmin(ptr %x, ptr %y) {
@@ -110,10 +107,9 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
110107
; CHECK-NEXT: vse8.v v8, (a1)
111108
; CHECK-NEXT: ret
112109
%1 = load <4 x i16>, ptr %x, align 16
113-
%2 = tail call <4 x i16> @llvm.umax.v4i16(<4 x i16> %1, <4 x i16> <i16 0, i16 0, i16 0, i16 0>)
114-
%3 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %2, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
115-
%4 = trunc <4 x i16> %3 to <4 x i8>
116-
store <4 x i8> %4, ptr %y, align 8
110+
%2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
111+
%3 = trunc <4 x i16> %2 to <4 x i8>
112+
store <4 x i8> %3, ptr %y, align 8
117113
ret void
118114
}
119115

@@ -127,9 +123,8 @@ define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
127123
; CHECK-NEXT: ret
128124
%1 = load <4 x i16>, ptr %x, align 16
129125
%2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
130-
%3 = tail call <4 x i16> @llvm.umax.v4i16(<4 x i16> %2, <4 x i16> <i16 0, i16 0, i16 0, i16 0>)
131-
%4 = trunc <4 x i16> %3 to <4 x i8>
132-
store <4 x i8> %4, ptr %y, align 8
126+
%3 = trunc <4 x i16> %2 to <4 x i8>
127+
store <4 x i8> %3, ptr %y, align 8
133128
ret void
134129
}
135130

@@ -231,10 +226,9 @@ define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
231226
; CHECK-NEXT: vse16.v v8, (a1)
232227
; CHECK-NEXT: ret
233228
%1 = load <4 x i32>, ptr %x, align 32
234-
%2 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
235-
%3 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %2, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
236-
%4 = trunc <4 x i32> %3 to <4 x i16>
237-
store <4 x i16> %4, ptr %y, align 16
229+
%2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
230+
%3 = trunc <4 x i32> %2 to <4 x i16>
231+
store <4 x i16> %3, ptr %y, align 16
238232
ret void
239233
}
240234

@@ -248,9 +242,8 @@ define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
248242
; CHECK-NEXT: ret
249243
%1 = load <4 x i32>, ptr %x, align 32
250244
%2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
251-
%3 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
252-
%4 = trunc <4 x i32> %3 to <4 x i16>
253-
store <4 x i16> %4, ptr %y, align 16
245+
%3 = trunc <4 x i32> %2 to <4 x i16>
246+
store <4 x i16> %3, ptr %y, align 16
254247
ret void
255248
}
256249

@@ -355,10 +348,9 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
355348
; CHECK-NEXT: vse32.v v10, (a1)
356349
; CHECK-NEXT: ret
357350
%1 = load <4 x i64>, ptr %x, align 64
358-
%2 = tail call <4 x i64> @llvm.umax.v4i64(<4 x i64> %1, <4 x i64> <i64 0, i64 0, i64 0, i64 0>)
359-
%3 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %2, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
360-
%4 = trunc <4 x i64> %3 to <4 x i32>
361-
store <4 x i32> %4, ptr %y, align 32
351+
%2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
352+
%3 = trunc <4 x i64> %2 to <4 x i32>
353+
store <4 x i32> %3, ptr %y, align 32
362354
ret void
363355
}
364356

@@ -372,8 +364,7 @@ define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
372364
; CHECK-NEXT: ret
373365
%1 = load <4 x i64>, ptr %x, align 64
374366
%2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
375-
%3 = tail call <4 x i64> @llvm.umax.v4i64(<4 x i64> %2, <4 x i64> <i64 0, i64 0, i64 0, i64 0>)
376-
%4 = trunc <4 x i64> %3 to <4 x i32>
377-
store <4 x i32> %4, ptr %y, align 32
367+
%3 = trunc <4 x i64> %2 to <4 x i32>
368+
store <4 x i32> %3, ptr %y, align 32
378369
ret void
379370
}

0 commit comments

Comments
 (0)