Skip to content

Commit 87779fd

Browse files
authored
[RISCV][ISel] Remove redundant min/max in saturating truncation (#75145)
This patch closed #73424, which is also a missed-optimization case similar to #68466 on X86. ## Source Code ``` define void @trunc_sat_i8i16(ptr %x, ptr %y) { %1 = load <8 x i16>, ptr %x, align 16 %2 = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> %1, <8 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>) %3 = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> %2, <8 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>) %4 = trunc <8 x i16> %3 to <8 x i8> store <8 x i8> %4, ptr %y, align 8 ret void } ``` ## Before this patch: ``` trunc_sat_i8i16: # @trunc_maxmin_id_i8i16 vsetivli zero, 8, e16, m1, ta, ma vle16.v v8, (a0) li a0, -128 vmax.vx v8, v8, a0 li a0, 127 vmin.vx v8, v8, a0 vsetvli zero, zero, e8, mf2, ta, ma vnsrl.wi v8, v8, 0 vse8.v v8, (a1) ret ``` ## After this patch: ``` trunc_sat_i8i16: # @trunc_maxmin_id_i8i16 vsetivli zero, 8, e8, mf2, ta, ma vle16.v v8, (a0) csrwi vxrm, 0 vnclip.wi v8, v8, 0 vse8.v v8, (a1) ret ```
1 parent dbd1fb8 commit 87779fd

File tree

3 files changed

+500
-122
lines changed

3 files changed

+500
-122
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2338,6 +2338,64 @@ defm : VPatBinaryVL_VV_VX_VI<riscv_uaddsat_vl, "PseudoVSADDU">;
23382338
defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">;
23392339
defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">;
23402340

2341+
// 12.5. Vector Narrowing Fixed-Point Clip Instructions
2342+
class VPatTruncSatClipMaxMinBase<string inst,
2343+
VTypeInfo vti,
2344+
VTypeInfo wti,
2345+
SDPatternOperator op1,
2346+
int op1_value,
2347+
SDPatternOperator op2,
2348+
int op2_value> :
2349+
Pat<(vti.Vector (riscv_trunc_vector_vl
2350+
(wti.Vector (op1
2351+
(wti.Vector (op2
2352+
(wti.Vector wti.RegClass:$rs1),
2353+
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op2_value, (XLenVT srcvalue))),
2354+
(wti.Vector undef),(wti.Mask V0), VLOpFrag)),
2355+
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op1_value, (XLenVT srcvalue))),
2356+
(wti.Vector undef), (wti.Mask V0), VLOpFrag)),
2357+
(vti.Mask V0), VLOpFrag)),
2358+
(!cast<Instruction>(inst#"_WI_"#vti.LMul.MX#"_MASK")
2359+
(vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
2360+
(vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
2361+
2362+
class VPatTruncSatClipUMin<VTypeInfo vti,
2363+
VTypeInfo wti,
2364+
int uminval> :
2365+
Pat<(vti.Vector (riscv_trunc_vector_vl
2366+
(wti.Vector (riscv_umin_vl
2367+
(wti.Vector wti.RegClass:$rs1),
2368+
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), uminval, (XLenVT srcvalue))),
2369+
(wti.Vector undef), (wti.Mask V0), VLOpFrag)),
2370+
(vti.Mask V0), VLOpFrag)),
2371+
(!cast<Instruction>("PseudoVNCLIPU_WI_"#vti.LMul.MX#"_MASK")
2372+
(vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
2373+
(vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
2374+
2375+
multiclass VPatTruncSatClipMaxMin<string inst, VTypeInfo vti, VTypeInfo wti,
2376+
SDPatternOperator max, int maxval, SDPatternOperator min, int minval> {
2377+
def : VPatTruncSatClipMaxMinBase<inst, vti, wti, max, maxval, min, minval>;
2378+
def : VPatTruncSatClipMaxMinBase<inst, vti, wti, min, minval, max, maxval>;
2379+
}
2380+
2381+
multiclass VPatTruncSatClip<VTypeInfo vti, VTypeInfo wti> {
2382+
defvar sew = vti.SEW;
2383+
defvar uminval = !sub(!shl(1, sew), 1);
2384+
defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1);
2385+
defvar smaxval = !sub(0, !shl(1, !sub(sew, 1)));
2386+
2387+
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
2388+
GetVTypePredicates<wti>.Predicates) in {
2389+
defm : VPatTruncSatClipMaxMin<"PseudoVNCLIP", vti, wti, riscv_smin_vl,
2390+
sminval, riscv_smax_vl, smaxval>;
2391+
def : VPatTruncSatClipUMin<vti, wti, uminval>;
2392+
}
2393+
2394+
}
2395+
2396+
foreach vtiToWti = AllWidenableIntVectors in
2397+
defm : VPatTruncSatClip<vtiToWti.Vti, vtiToWti.Wti>;
2398+
23412399
// 13. Vector Floating-Point Instructions
23422400

23432401
// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions

0 commit comments

Comments
 (0)