Skip to content

Commit 555e030

Browse files
LWenHtopperc
authored andcommitted
[RISCV] Match ext + ext + srem + trunc to vrem.vv
This patch match the SDNode pattern:" trunc (srem(sext, ext))" to vrem.vv. This could remove the extra "vsext" ,"vnsrl" and the "vsetvli" instructions in the case like "c[i] = a[i] % b[i]", where the element types in the array are all int8_t or int16_t at the same time. For element types like uint8_t or uint16_t, the "zext + zext + urem + trunc" based redundant IR have been removed during the instCombine pass, this is because the urem operation won't lead to the overflowed in the LLVM. However, for signed types, the instCombine pass can not remove such patterns due to the potential for Undefined Behavior in LLVM IR. Taking an example, -128 % -1 will lead to the Undefined Behaviour(overflowed) under the i8 type in LLVM IR, but this situation doesn't occur for i32. To address this, LLVM first signed extends the operands for srem to i32 to prevent the UB. For RVV, such overflow operations are already defined by the specification and yield deterministic output for extreme inputs. For example, based on the spec, for the i8 type, -128 % -1 actually have 0 as the output result under the overflowed situation. Therefore, it would be able to match such pattern in the instruction selection phase for the rvv backend rather than removing them in the target-independent optimization passes like instCombine pass. This patch only handle the sign_ext circumstances for srem. For more information about the C test cases compared with GCC, please see : https://gcc.godbolt.org/z/MWzE7WaT4 Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D156685
1 parent 6cb55a3 commit 555e030

File tree

2 files changed

+39
-66
lines changed

2 files changed

+39
-66
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,6 +1052,23 @@ defm : VPatBinarySDNode_VV_VX<sdiv, "PseudoVDIV", isSEWAware=1>;
10521052
defm : VPatBinarySDNode_VV_VX<urem, "PseudoVREMU", isSEWAware=1>;
10531053
defm : VPatBinarySDNode_VV_VX<srem, "PseudoVREM", isSEWAware=1>;
10541054

1055+
foreach vtiTowti = AllWidenableIntVectors in {
1056+
defvar vti = vtiTowti.Vti;
1057+
defvar wti = vtiTowti.Wti;
1058+
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
1059+
GetVTypePredicates<wti>.Predicates) in {
1060+
def : Pat<
1061+
(vti.Vector
1062+
(riscv_trunc_vector_vl
1063+
(srem (wti.Vector (sext_oneuse (vti.Vector vti.RegClass:$rs1))),
1064+
(wti.Vector (sext_oneuse (vti.Vector vti.RegClass:$rs2)))),
1065+
(vti.Mask true_mask), (XLenVT srcvalue))),
1066+
(!cast<Instruction>("PseudoVREM_VV_"#vti.LMul.MX#"_E"#!shl(1, vti.Log2SEW))
1067+
(vti.Vector (IMPLICIT_DEF)),
1068+
vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
1069+
}
1070+
}
1071+
10551072
// 11.12. Vector Widening Integer Multiply Instructions
10561073
defm : VPatWidenBinarySDNode_VV_VX<mul, sext_oneuse, sext_oneuse,
10571074
"PseudoVWMUL">;

llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll

Lines changed: 22 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,8 @@ define <vscale x 1 x i8> @vrem_vi_nxv1i8_0(<vscale x 1 x i8> %va) {
4848
define <vscale x 1 x i8> @vrem_vv_nxv1i8_sext_twice(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
4949
; CHECK-LABEL: vrem_vv_nxv1i8_sext_twice:
5050
; CHECK: # %bb.0:
51-
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
52-
; CHECK-NEXT: vsext.vf2 v10, v8
53-
; CHECK-NEXT: vsext.vf2 v8, v9
54-
; CHECK-NEXT: vrem.vv v8, v10, v8
55-
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
56-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
51+
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
52+
; CHECK-NEXT: vrem.vv v8, v8, v9
5753
; CHECK-NEXT: ret
5854
%sext_va = sext <vscale x 1 x i8> %va to <vscale x 1 x i16>
5955
%sext_vb = sext <vscale x 1 x i8> %vb to <vscale x 1 x i16>
@@ -106,12 +102,8 @@ define <vscale x 2 x i8> @vrem_vi_nxv2i8_0(<vscale x 2 x i8> %va) {
106102
define <vscale x 2 x i8> @vrem_vv_nxv2i8_sext_twice(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
107103
; CHECK-LABEL: vrem_vv_nxv2i8_sext_twice:
108104
; CHECK: # %bb.0:
109-
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
110-
; CHECK-NEXT: vsext.vf2 v10, v8
111-
; CHECK-NEXT: vsext.vf2 v8, v9
112-
; CHECK-NEXT: vrem.vv v8, v10, v8
113-
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
114-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
105+
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
106+
; CHECK-NEXT: vrem.vv v8, v8, v9
115107
; CHECK-NEXT: ret
116108
%sext_va = sext <vscale x 2 x i8> %va to <vscale x 2 x i16>
117109
%sext_vb = sext <vscale x 2 x i8> %vb to <vscale x 2 x i16>
@@ -164,12 +156,8 @@ define <vscale x 4 x i8> @vrem_vi_nxv4i8_0(<vscale x 4 x i8> %va) {
164156
define <vscale x 4 x i8> @vrem_vv_nxv4i8_sext_twice(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
165157
; CHECK-LABEL: vrem_vv_nxv4i8_sext_twice:
166158
; CHECK: # %bb.0:
167-
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
168-
; CHECK-NEXT: vsext.vf2 v10, v8
169-
; CHECK-NEXT: vsext.vf2 v8, v9
170-
; CHECK-NEXT: vrem.vv v8, v10, v8
171-
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
172-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
159+
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
160+
; CHECK-NEXT: vrem.vv v8, v8, v9
173161
; CHECK-NEXT: ret
174162
%sext_va = sext <vscale x 4 x i8> %va to <vscale x 4 x i16>
175163
%sext_vb = sext <vscale x 4 x i8> %vb to <vscale x 4 x i16>
@@ -222,12 +210,8 @@ define <vscale x 8 x i8> @vrem_vi_nxv8i8_0(<vscale x 8 x i8> %va) {
222210
define <vscale x 8 x i8> @vrem_vv_nxv8i8_sext_twice(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
223211
; CHECK-LABEL: vrem_vv_nxv8i8_sext_twice:
224212
; CHECK: # %bb.0:
225-
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
226-
; CHECK-NEXT: vsext.vf2 v10, v8
227-
; CHECK-NEXT: vsext.vf2 v12, v9
228-
; CHECK-NEXT: vrem.vv v10, v10, v12
229-
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
230-
; CHECK-NEXT: vnsrl.wi v8, v10, 0
213+
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
214+
; CHECK-NEXT: vrem.vv v8, v8, v9
231215
; CHECK-NEXT: ret
232216
%sext_va = sext <vscale x 8 x i8> %va to <vscale x 8 x i16>
233217
%sext_vb = sext <vscale x 8 x i8> %vb to <vscale x 8 x i16>
@@ -280,12 +264,8 @@ define <vscale x 16 x i8> @vrem_vi_nxv16i8_0(<vscale x 16 x i8> %va) {
280264
define <vscale x 16 x i8> @vrem_vv_nxv16i8_sext_twice(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb) {
281265
; CHECK-LABEL: vrem_vv_nxv16i8_sext_twice:
282266
; CHECK: # %bb.0:
283-
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
284-
; CHECK-NEXT: vsext.vf2 v12, v8
285-
; CHECK-NEXT: vsext.vf2 v16, v10
286-
; CHECK-NEXT: vrem.vv v12, v12, v16
287-
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
288-
; CHECK-NEXT: vnsrl.wi v8, v12, 0
267+
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
268+
; CHECK-NEXT: vrem.vv v8, v8, v10
289269
; CHECK-NEXT: ret
290270
%sext_va = sext <vscale x 16 x i8> %va to <vscale x 16 x i16>
291271
%sext_vb = sext <vscale x 16 x i8> %vb to <vscale x 16 x i16>
@@ -338,12 +318,8 @@ define <vscale x 32 x i8> @vrem_vi_nxv32i8_0(<vscale x 32 x i8> %va) {
338318
define <vscale x 32 x i8> @vrem_vv_nxv32i8_sext_twice(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb) {
339319
; CHECK-LABEL: vrem_vv_nxv32i8_sext_twice:
340320
; CHECK: # %bb.0:
341-
; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
342-
; CHECK-NEXT: vsext.vf2 v16, v8
343-
; CHECK-NEXT: vsext.vf2 v24, v12
344-
; CHECK-NEXT: vrem.vv v16, v16, v24
345-
; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
346-
; CHECK-NEXT: vnsrl.wi v8, v16, 0
321+
; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
322+
; CHECK-NEXT: vrem.vv v8, v8, v12
347323
; CHECK-NEXT: ret
348324
%sext_va = sext <vscale x 32 x i8> %va to <vscale x 32 x i16>
349325
%sext_vb = sext <vscale x 32 x i8> %vb to <vscale x 32 x i16>
@@ -450,12 +426,8 @@ define <vscale x 1 x i16> @vrem_vi_nxv1i16_0(<vscale x 1 x i16> %va) {
450426
define <vscale x 1 x i16> @vrem_vv_nxv1i16_sext_twice(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
451427
; CHECK-LABEL: vrem_vv_nxv1i16_sext_twice:
452428
; CHECK: # %bb.0:
453-
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
454-
; CHECK-NEXT: vsext.vf2 v10, v8
455-
; CHECK-NEXT: vsext.vf2 v8, v9
456-
; CHECK-NEXT: vrem.vv v8, v10, v8
457-
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
458-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
429+
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
430+
; CHECK-NEXT: vrem.vv v8, v8, v9
459431
; CHECK-NEXT: ret
460432
%sext_va = sext <vscale x 1 x i16> %va to <vscale x 1 x i32>
461433
%sext_vb = sext <vscale x 1 x i16> %vb to <vscale x 1 x i32>
@@ -521,12 +493,8 @@ define <vscale x 2 x i16> @vrem_vi_nxv2i16_0(<vscale x 2 x i16> %va) {
521493
define <vscale x 2 x i16> @vrem_vv_nxv2i16_sext_twice(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
522494
; CHECK-LABEL: vrem_vv_nxv2i16_sext_twice:
523495
; CHECK: # %bb.0:
524-
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
525-
; CHECK-NEXT: vsext.vf2 v10, v8
526-
; CHECK-NEXT: vsext.vf2 v8, v9
527-
; CHECK-NEXT: vrem.vv v8, v10, v8
528-
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
529-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
496+
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
497+
; CHECK-NEXT: vrem.vv v8, v8, v9
530498
; CHECK-NEXT: ret
531499
%sext_va = sext <vscale x 2 x i16> %va to <vscale x 2 x i32>
532500
%sext_vb = sext <vscale x 2 x i16> %vb to <vscale x 2 x i32>
@@ -592,12 +560,8 @@ define <vscale x 4 x i16> @vrem_vi_nxv4i16_0(<vscale x 4 x i16> %va) {
592560
define <vscale x 4 x i16> @vrem_vv_nxv4i16_sext_twice(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
593561
; CHECK-LABEL: vrem_vv_nxv4i16_sext_twice:
594562
; CHECK: # %bb.0:
595-
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
596-
; CHECK-NEXT: vsext.vf2 v10, v8
597-
; CHECK-NEXT: vsext.vf2 v12, v9
598-
; CHECK-NEXT: vrem.vv v10, v10, v12
599-
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
600-
; CHECK-NEXT: vnsrl.wi v8, v10, 0
563+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
564+
; CHECK-NEXT: vrem.vv v8, v8, v9
601565
; CHECK-NEXT: ret
602566
%sext_va = sext <vscale x 4 x i16> %va to <vscale x 4 x i32>
603567
%sext_vb = sext <vscale x 4 x i16> %vb to <vscale x 4 x i32>
@@ -663,12 +627,8 @@ define <vscale x 8 x i16> @vrem_vi_nxv8i16_0(<vscale x 8 x i16> %va) {
663627
define <vscale x 8 x i16> @vrem_vv_nxv8i16_sext_twice(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
664628
; CHECK-LABEL: vrem_vv_nxv8i16_sext_twice:
665629
; CHECK: # %bb.0:
666-
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
667-
; CHECK-NEXT: vsext.vf2 v12, v8
668-
; CHECK-NEXT: vsext.vf2 v16, v10
669-
; CHECK-NEXT: vrem.vv v12, v12, v16
670-
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
671-
; CHECK-NEXT: vnsrl.wi v8, v12, 0
630+
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
631+
; CHECK-NEXT: vrem.vv v8, v8, v10
672632
; CHECK-NEXT: ret
673633
%sext_va = sext <vscale x 8 x i16> %va to <vscale x 8 x i32>
674634
%sext_vb = sext <vscale x 8 x i16> %vb to <vscale x 8 x i32>
@@ -734,12 +694,8 @@ define <vscale x 16 x i16> @vrem_vi_nxv16i16_0(<vscale x 16 x i16> %va) {
734694
define <vscale x 16 x i16> @vrem_vv_nxv16i16_sext_twice(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb) {
735695
; CHECK-LABEL: vrem_vv_nxv16i16_sext_twice:
736696
; CHECK: # %bb.0:
737-
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
738-
; CHECK-NEXT: vsext.vf2 v16, v8
739-
; CHECK-NEXT: vsext.vf2 v24, v12
740-
; CHECK-NEXT: vrem.vv v16, v16, v24
741-
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
742-
; CHECK-NEXT: vnsrl.wi v8, v16, 0
697+
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
698+
; CHECK-NEXT: vrem.vv v8, v8, v12
743699
; CHECK-NEXT: ret
744700
%sext_va = sext <vscale x 16 x i16> %va to <vscale x 16 x i32>
745701
%sext_vb = sext <vscale x 16 x i16> %vb to <vscale x 16 x i32>

0 commit comments

Comments
 (0)