Skip to content

Commit 0c24c17

Browse files
authored
[RISCV][ISel] Use vaaddu with rounding mode rdn for ISD::AVGFLOORU. (#76550)
This patch aims to use `vaaddu` with rounding mode rdn (i.e `vxrm[1:0] = 0b10`) for `ISD::AVGFLOORU`. ### Source code ``` define <8 x i8> @vaaddu_auto(ptr %x, ptr %y, ptr %z) { %xv = load <8 x i8>, ptr %x, align 2 %yv = load <8 x i8>, ptr %y, align 2 %xzv = zext <8 x i8> %xv to <8 x i16> %yzv = zext <8 x i8> %yv to <8 x i16> %add = add nuw nsw <8 x i16> %xzv, %yzv %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> %ret = trunc <8 x i16> %div to <8 x i8> ret <8 x i8> %ret } ``` ### Before this patch ``` vaaddu_auto: vsetivli zero, 8, e8, mf2, ta, ma vle8.v v8, (a0) vle8.v v9, (a1) vwaddu.vv v10, v8, v9 vnsrl.wi v8, v10, 1 ret ``` ### After this patch ``` vaaddu_auto: vsetivli zero, 8, e8, mf2, ta, ma vle8.v v8, (a0) vle8.v v9, (a1) csrwi vxrm, 2 vaaddu.vv v8, v8, v9 ret ``` ### Note on signed averaging addition Based on the rvv spec, there is also a variant for signed averaging addition called `vaadd`. But AFAIU, no matter in which rounding mode, we cannot achieve the semantic of signed averaging addition through `vaadd`. Thus this patch only introduces `vaaddu`.
1 parent ee78e03 commit 0c24c17

File tree

6 files changed

+482
-7
lines changed

6 files changed

+482
-7
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -814,8 +814,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
814814
setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
815815
Custom);
816816
setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
817-
setOperationAction(
818-
{ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal);
817+
setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT,
818+
ISD::SSUBSAT, ISD::USUBSAT},
819+
VT, Legal);
819820

820821
// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
821822
// nodes which truncate by one power of two at a time.
@@ -1184,9 +1185,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
11841185
if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
11851186
setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
11861187

1187-
setOperationAction(
1188-
{ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT,
1189-
Custom);
1188+
setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT,
1189+
ISD::SSUBSAT, ISD::USUBSAT},
1190+
VT, Custom);
11901191

11911192
setOperationAction(ISD::VSELECT, VT, Custom);
11921193
setOperationAction(ISD::SELECT_CC, VT, Expand);
@@ -5465,6 +5466,7 @@ static unsigned getRISCVVLOp(SDValue Op) {
54655466
OP_CASE(UADDSAT)
54665467
OP_CASE(SSUBSAT)
54675468
OP_CASE(USUBSAT)
5469+
OP_CASE(AVGFLOORU)
54685470
OP_CASE(FADD)
54695471
OP_CASE(FSUB)
54705472
OP_CASE(FMUL)
@@ -5569,7 +5571,7 @@ static bool hasMergeOp(unsigned Opcode) {
55695571
Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
55705572
"not a RISC-V target specific op");
55715573
static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
5572-
124 &&
5574+
125 &&
55735575
RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
55745576
ISD::FIRST_TARGET_STRICTFP_OPCODE ==
55755577
21 &&
@@ -5595,7 +5597,7 @@ static bool hasMaskOp(unsigned Opcode) {
55955597
Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
55965598
"not a RISC-V target specific op");
55975599
static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
5598-
124 &&
5600+
125 &&
55995601
RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
56005602
ISD::FIRST_TARGET_STRICTFP_OPCODE ==
56015603
21 &&
@@ -6459,6 +6461,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
64596461
!Subtarget.hasVInstructionsF16()))
64606462
return SplitVectorOp(Op, DAG);
64616463
[[fallthrough]];
6464+
case ISD::AVGFLOORU:
64626465
case ISD::SADDSAT:
64636466
case ISD::UADDSAT:
64646467
case ISD::SSUBSAT:
@@ -18595,6 +18598,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
1859518598
NODE_NAME_CASE(UDIV_VL)
1859618599
NODE_NAME_CASE(UREM_VL)
1859718600
NODE_NAME_CASE(XOR_VL)
18601+
NODE_NAME_CASE(AVGFLOORU_VL)
1859818602
NODE_NAME_CASE(SADDSAT_VL)
1859918603
NODE_NAME_CASE(UADDSAT_VL)
1860018604
NODE_NAME_CASE(SSUBSAT_VL)

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,9 @@ enum NodeType : unsigned {
253253
SSUBSAT_VL,
254254
USUBSAT_VL,
255255

256+
// Averaging adds of unsigned integers.
257+
AVGFLOORU_VL,
258+
256259
MULHS_VL,
257260
MULHU_VL,
258261
FADD_VL,
@@ -902,6 +905,7 @@ class RISCVTargetLowering : public TargetLowering {
902905
SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op,
903906
SelectionDAG &DAG) const;
904907
SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
908+
SDValue lowerUnsignedAvgFloor(SDValue Op, SelectionDAG &DAG) const;
905909
SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
906910
SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const;
907911
SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,6 +1131,22 @@ defm : VPatBinarySDNode_VV_VX_VI<uaddsat, "PseudoVSADDU">;
11311131
defm : VPatBinarySDNode_VV_VX<ssubsat, "PseudoVSSUB">;
11321132
defm : VPatBinarySDNode_VV_VX<usubsat, "PseudoVSSUBU">;
11331133

1134+
// 12.2. Vector Single-Width Averaging Add and Subtract
1135+
foreach vti = AllIntegerVectors in {
1136+
let Predicates = GetVTypePredicates<vti>.Predicates in {
1137+
def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1),
1138+
(vti.Vector vti.RegClass:$rs2)),
1139+
(!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX)
1140+
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2,
1141+
0b10, vti.AVL, vti.Log2SEW, TA_MA)>;
1142+
def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1),
1143+
(vti.Vector (SplatPat (XLenVT GPR:$rs2)))),
1144+
(!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX)
1145+
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
1146+
0b10, vti.AVL, vti.Log2SEW, TA_MA)>;
1147+
}
1148+
}
1149+
11341150
// 15. Vector Mask Instructions
11351151

11361152
// 15.1. Vector Mask-Register Logical Instructions

llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ def riscv_ctlz_vl : SDNode<"RISCVISD::CTLZ_VL", SDT_RISCVIntUnOp_VL>
111111
def riscv_cttz_vl : SDNode<"RISCVISD::CTTZ_VL", SDT_RISCVIntUnOp_VL>;
112112
def riscv_ctpop_vl : SDNode<"RISCVISD::CTPOP_VL", SDT_RISCVIntUnOp_VL>;
113113

114+
def riscv_avgflooru_vl : SDNode<"RISCVISD::AVGFLOORU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
114115
def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
115116
def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
116117
def riscv_ssubsat_vl : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>;
@@ -2306,6 +2307,24 @@ defm : VPatBinaryVL_VV_VX_VI<riscv_uaddsat_vl, "PseudoVSADDU">;
23062307
defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">;
23072308
defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">;
23082309

2310+
// 12.2. Vector Single-Width Averaging Add and Subtract
2311+
foreach vti = AllIntegerVectors in {
2312+
let Predicates = GetVTypePredicates<vti>.Predicates in {
2313+
def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1),
2314+
(vti.Vector vti.RegClass:$rs2),
2315+
vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
2316+
(!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK")
2317+
vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2,
2318+
(vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
2319+
def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1),
2320+
(vti.Vector (SplatPat (XLenVT GPR:$rs2))),
2321+
vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
2322+
(!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK")
2323+
vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2,
2324+
(vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
2325+
}
2326+
}
2327+
23092328
// 12.5. Vector Narrowing Fixed-Point Clip Instructions
23102329
class VPatTruncSatClipMaxMinBase<string inst,
23112330
VTypeInfo vti,
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3+
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4+
5+
define <8 x i8> @vaaddu_vv_v8i8(<8 x i8> %x, <8 x i8> %y) {
6+
; CHECK-LABEL: vaaddu_vv_v8i8:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
9+
; CHECK-NEXT: csrwi vxrm, 2
10+
; CHECK-NEXT: vaaddu.vv v8, v8, v9
11+
; CHECK-NEXT: ret
12+
%xzv = zext <8 x i8> %x to <8 x i16>
13+
%yzv = zext <8 x i8> %y to <8 x i16>
14+
%add = add nuw nsw <8 x i16> %xzv, %yzv
15+
%div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
16+
%ret = trunc <8 x i16> %div to <8 x i8>
17+
ret <8 x i8> %ret
18+
}
19+
20+
define <8 x i8> @vaaddu_vx_v8i8(<8 x i8> %x, i8 %y) {
21+
; CHECK-LABEL: vaaddu_vx_v8i8:
22+
; CHECK: # %bb.0:
23+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
24+
; CHECK-NEXT: csrwi vxrm, 2
25+
; CHECK-NEXT: vaaddu.vx v8, v8, a0
26+
; CHECK-NEXT: ret
27+
%xzv = zext <8 x i8> %x to <8 x i16>
28+
%yhead = insertelement <8 x i8> poison, i8 %y, i32 0
29+
%ysplat = shufflevector <8 x i8> %yhead, <8 x i8> poison, <8 x i32> zeroinitializer
30+
%yzv = zext <8 x i8> %ysplat to <8 x i16>
31+
%add = add nuw nsw <8 x i16> %xzv, %yzv
32+
%one = insertelement <8 x i16> poison, i16 1, i32 0
33+
%splat = shufflevector <8 x i16> %one, <8 x i16> poison, <8 x i32> zeroinitializer
34+
%div = lshr <8 x i16> %add, %splat
35+
%ret = trunc <8 x i16> %div to <8 x i8>
36+
ret <8 x i8> %ret
37+
}
38+
39+
40+
define <8 x i8> @vaaddu_vv_v8i8_sexti16(<8 x i8> %x, <8 x i8> %y) {
41+
; CHECK-LABEL: vaaddu_vv_v8i8_sexti16:
42+
; CHECK: # %bb.0:
43+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
44+
; CHECK-NEXT: vwadd.vv v10, v8, v9
45+
; CHECK-NEXT: vnsrl.wi v8, v10, 1
46+
; CHECK-NEXT: ret
47+
%xzv = sext <8 x i8> %x to <8 x i16>
48+
%yzv = sext <8 x i8> %y to <8 x i16>
49+
%add = add nuw nsw <8 x i16> %xzv, %yzv
50+
%div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
51+
%ret = trunc <8 x i16> %div to <8 x i8>
52+
ret <8 x i8> %ret
53+
}
54+
55+
define <8 x i8> @vaaddu_vv_v8i8_zexti32(<8 x i8> %x, <8 x i8> %y) {
56+
; CHECK-LABEL: vaaddu_vv_v8i8_zexti32:
57+
; CHECK: # %bb.0:
58+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
59+
; CHECK-NEXT: csrwi vxrm, 2
60+
; CHECK-NEXT: vaaddu.vv v8, v8, v9
61+
; CHECK-NEXT: ret
62+
%xzv = zext <8 x i8> %x to <8 x i32>
63+
%yzv = zext <8 x i8> %y to <8 x i32>
64+
%add = add nuw nsw <8 x i32> %xzv, %yzv
65+
%div = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
66+
%ret = trunc <8 x i32> %div to <8 x i8>
67+
ret <8 x i8> %ret
68+
}
69+
70+
define <8 x i8> @vaaddu_vv_v8i8_lshr2(<8 x i8> %x, <8 x i8> %y) {
71+
; CHECK-LABEL: vaaddu_vv_v8i8_lshr2:
72+
; CHECK: # %bb.0:
73+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
74+
; CHECK-NEXT: vwaddu.vv v10, v8, v9
75+
; CHECK-NEXT: vnsrl.wi v8, v10, 2
76+
; CHECK-NEXT: ret
77+
%xzv = zext <8 x i8> %x to <8 x i16>
78+
%yzv = zext <8 x i8> %y to <8 x i16>
79+
%add = add nuw nsw <8 x i16> %xzv, %yzv
80+
%div = lshr <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
81+
%ret = trunc <8 x i16> %div to <8 x i8>
82+
ret <8 x i8> %ret
83+
}
84+
85+
define <8 x i16> @vaaddu_vv_v8i16(<8 x i16> %x, <8 x i16> %y) {
86+
; CHECK-LABEL: vaaddu_vv_v8i16:
87+
; CHECK: # %bb.0:
88+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
89+
; CHECK-NEXT: csrwi vxrm, 2
90+
; CHECK-NEXT: vaaddu.vv v8, v8, v9
91+
; CHECK-NEXT: ret
92+
%xzv = zext <8 x i16> %x to <8 x i32>
93+
%yzv = zext <8 x i16> %y to <8 x i32>
94+
%add = add nuw nsw <8 x i32> %xzv, %yzv
95+
%div = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
96+
%ret = trunc <8 x i32> %div to <8 x i16>
97+
ret <8 x i16> %ret
98+
}
99+
100+
define <8 x i16> @vaaddu_vx_v8i16(<8 x i16> %x, i16 %y) {
101+
; CHECK-LABEL: vaaddu_vx_v8i16:
102+
; CHECK: # %bb.0:
103+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
104+
; CHECK-NEXT: csrwi vxrm, 2
105+
; CHECK-NEXT: vaaddu.vx v8, v8, a0
106+
; CHECK-NEXT: ret
107+
%xzv = zext <8 x i16> %x to <8 x i32>
108+
%yhead = insertelement <8 x i16> poison, i16 %y, i16 0
109+
%ysplat = shufflevector <8 x i16> %yhead, <8 x i16> poison, <8 x i32> zeroinitializer
110+
%yzv = zext <8 x i16> %ysplat to <8 x i32>
111+
%add = add nuw nsw <8 x i32> %xzv, %yzv
112+
%one = insertelement <8 x i32> poison, i32 1, i32 0
113+
%splat = shufflevector <8 x i32> %one, <8 x i32> poison, <8 x i32> zeroinitializer
114+
%div = lshr <8 x i32> %add, %splat
115+
%ret = trunc <8 x i32> %div to <8 x i16>
116+
ret <8 x i16> %ret
117+
}
118+
119+
define <8 x i32> @vaaddu_vv_v8i32(<8 x i32> %x, <8 x i32> %y) {
120+
; CHECK-LABEL: vaaddu_vv_v8i32:
121+
; CHECK: # %bb.0:
122+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
123+
; CHECK-NEXT: csrwi vxrm, 2
124+
; CHECK-NEXT: vaaddu.vv v8, v8, v10
125+
; CHECK-NEXT: ret
126+
%xzv = zext <8 x i32> %x to <8 x i64>
127+
%yzv = zext <8 x i32> %y to <8 x i64>
128+
%add = add nuw nsw <8 x i64> %xzv, %yzv
129+
%div = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
130+
%ret = trunc <8 x i64> %div to <8 x i32>
131+
ret <8 x i32> %ret
132+
}
133+
134+
define <8 x i32> @vaaddu_vx_v8i32(<8 x i32> %x, i32 %y) {
135+
; CHECK-LABEL: vaaddu_vx_v8i32:
136+
; CHECK: # %bb.0:
137+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
138+
; CHECK-NEXT: csrwi vxrm, 2
139+
; CHECK-NEXT: vaaddu.vx v8, v8, a0
140+
; CHECK-NEXT: ret
141+
%xzv = zext <8 x i32> %x to <8 x i64>
142+
%yhead = insertelement <8 x i32> poison, i32 %y, i32 0
143+
%ysplat = shufflevector <8 x i32> %yhead, <8 x i32> poison, <8 x i32> zeroinitializer
144+
%yzv = zext <8 x i32> %ysplat to <8 x i64>
145+
%add = add nuw nsw <8 x i64> %xzv, %yzv
146+
%one = insertelement <8 x i64> poison, i64 1, i64 0
147+
%splat = shufflevector <8 x i64> %one, <8 x i64> poison, <8 x i32> zeroinitializer
148+
%div = lshr <8 x i64> %add, %splat
149+
%ret = trunc <8 x i64> %div to <8 x i32>
150+
ret <8 x i32> %ret
151+
}
152+
153+
define <8 x i64> @vaaddu_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
154+
; CHECK-LABEL: vaaddu_vv_v8i64:
155+
; CHECK: # %bb.0:
156+
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
157+
; CHECK-NEXT: csrwi vxrm, 2
158+
; CHECK-NEXT: vaaddu.vv v8, v8, v12
159+
; CHECK-NEXT: ret
160+
%xzv = zext <8 x i64> %x to <8 x i128>
161+
%yzv = zext <8 x i64> %y to <8 x i128>
162+
%add = add nuw nsw <8 x i128> %xzv, %yzv
163+
%div = lshr <8 x i128> %add, <i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1>
164+
%ret = trunc <8 x i128> %div to <8 x i64>
165+
ret <8 x i64> %ret
166+
}
167+
168+
define <8 x i1> @vaaddu_vv_v8i1(<8 x i1> %x, <8 x i1> %y) {
169+
; CHECK-LABEL: vaaddu_vv_v8i1:
170+
; CHECK: # %bb.0:
171+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
172+
; CHECK-NEXT: vmv.v.i v9, 0
173+
; CHECK-NEXT: vmerge.vim v10, v9, 1, v0
174+
; CHECK-NEXT: vmv1r.v v0, v8
175+
; CHECK-NEXT: vmerge.vim v8, v9, 1, v0
176+
; CHECK-NEXT: csrwi vxrm, 2
177+
; CHECK-NEXT: vaaddu.vv v8, v10, v8
178+
; CHECK-NEXT: vand.vi v8, v8, 1
179+
; CHECK-NEXT: vmsne.vi v0, v8, 0
180+
; CHECK-NEXT: ret
181+
%xzv = zext <8 x i1> %x to <8 x i8>
182+
%yzv = zext <8 x i1> %y to <8 x i8>
183+
%add = add nuw nsw <8 x i8> %xzv, %yzv
184+
%div = lshr <8 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
185+
%ret = trunc <8 x i8> %div to <8 x i1>
186+
ret <8 x i1> %ret
187+
}
188+
189+
define <8 x i64> @vaaddu_vx_v8i64(<8 x i64> %x, i64 %y) {
190+
; RV32-LABEL: vaaddu_vx_v8i64:
191+
; RV32: # %bb.0:
192+
; RV32-NEXT: addi sp, sp, -16
193+
; RV32-NEXT: .cfi_def_cfa_offset 16
194+
; RV32-NEXT: sw a1, 12(sp)
195+
; RV32-NEXT: sw a0, 8(sp)
196+
; RV32-NEXT: addi a0, sp, 8
197+
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
198+
; RV32-NEXT: vlse64.v v12, (a0), zero
199+
; RV32-NEXT: csrwi vxrm, 2
200+
; RV32-NEXT: vaaddu.vv v8, v8, v12
201+
; RV32-NEXT: addi sp, sp, 16
202+
; RV32-NEXT: ret
203+
;
204+
; RV64-LABEL: vaaddu_vx_v8i64:
205+
; RV64: # %bb.0:
206+
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
207+
; RV64-NEXT: csrwi vxrm, 2
208+
; RV64-NEXT: vaaddu.vx v8, v8, a0
209+
; RV64-NEXT: ret
210+
%xzv = zext <8 x i64> %x to <8 x i128>
211+
%yhead = insertelement <8 x i64> poison, i64 %y, i64 0
212+
%ysplat = shufflevector <8 x i64> %yhead, <8 x i64> poison, <8 x i32> zeroinitializer
213+
%yzv = zext <8 x i64> %ysplat to <8 x i128>
214+
%add = add nuw nsw <8 x i128> %xzv, %yzv
215+
%one = insertelement <8 x i128> poison, i128 1, i128 0
216+
%splat = shufflevector <8 x i128> %one, <8 x i128> poison, <8 x i32> zeroinitializer
217+
%div = lshr <8 x i128> %add, %splat
218+
%ret = trunc <8 x i128> %div to <8 x i64>
219+
ret <8 x i64> %ret
220+
}

0 commit comments

Comments
 (0)