[RISCV][ISel] Use vaaddu with rounding mode rdn for ISD::AVGFLOORU. (#76550)

sun-jacobi · web-flow · commit 0c24c175f262 · 2024-01-09T15:17:38.000+09:00
This patch aims to use `vaaddu` with rounding mode rdn (i.e `vxrm[1:0] =
0b10`) for `ISD::AVGFLOORU`.

### Source code 
```
define &lt;8 x i8&gt; @vaaddu_auto(ptr %x, ptr %y, ptr %z) {
  %xv = load &lt;8 x i8&gt;, ptr %x, align 2
  %yv = load &lt;8 x i8&gt;, ptr %y, align 2
  %xzv = zext &lt;8 x i8&gt; %xv to &lt;8 x i16&gt;
  %yzv = zext &lt;8 x i8&gt; %yv to &lt;8 x i16&gt;
  %add = add nuw nsw &lt;8 x i16&gt; %xzv, %yzv
  %div = lshr &lt;8 x i16&gt; %add, &lt;i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1&gt;
  %ret = trunc &lt;8 x i16&gt; %div to &lt;8 x i8&gt;
  ret &lt;8 x i8&gt; %ret 
}
```

### Before this patch 
```
vaaddu_auto: 
        vsetivli        zero, 8, e8, mf2, ta, ma
        vle8.v  v8, (a0)
        vle8.v  v9, (a1)
        vwaddu.vv       v10, v8, v9
        vnsrl.wi        v8, v10, 1
        ret
```
### After this patch 
```
vaaddu_auto: 
	vsetivli	zero, 8, e8, mf2, ta, ma
	vle8.v	v8, (a0)
	vle8.v	v9, (a1)
	csrwi	vxrm, 2
	vaaddu.vv	v8, v8, v9
	ret
```

### Note on signed averaging addition

Based on the rvv spec, there is also a variant for signed averaging
addition called `vaadd`.
But AFAIU, no matter in which rounding mode, we cannot achieve the
semantic of signed averaging addition through `vaadd`.
Thus this patch only introduces `vaaddu`.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -814,8 +814,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
                          Custom);
       setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
-      setOperationAction(
-          {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal);
+      setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT,
+                          ISD::SSUBSAT, ISD::USUBSAT},
+                         VT, Legal);
 
       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
       // nodes which truncate by one power of two at a time.
@@ -1184,9 +1185,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
           setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
 
-        setOperationAction(
-            {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT,
-            Custom);
+        setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT,
+                            ISD::SSUBSAT, ISD::USUBSAT},
+                           VT, Custom);
 
         setOperationAction(ISD::VSELECT, VT, Custom);
         setOperationAction(ISD::SELECT_CC, VT, Expand);
@@ -5465,6 +5466,7 @@ static unsigned getRISCVVLOp(SDValue Op) {
   OP_CASE(UADDSAT)
   OP_CASE(SSUBSAT)
   OP_CASE(USUBSAT)
+  OP_CASE(AVGFLOORU)
   OP_CASE(FADD)
   OP_CASE(FSUB)
   OP_CASE(FMUL)
@@ -5569,7 +5571,7 @@ static bool hasMergeOp(unsigned Opcode) {
          Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
          "not a RISC-V target specific op");
   static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
-                    124 &&
+                    125 &&
                 RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
                         ISD::FIRST_TARGET_STRICTFP_OPCODE ==
                     21 &&
@@ -5595,7 +5597,7 @@ static bool hasMaskOp(unsigned Opcode) {
          Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
          "not a RISC-V target specific op");
   static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
-                    124 &&
+                    125 &&
                 RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
                         ISD::FIRST_TARGET_STRICTFP_OPCODE ==
                     21 &&
@@ -6459,6 +6461,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
          !Subtarget.hasVInstructionsF16()))
       return SplitVectorOp(Op, DAG);
     [[fallthrough]];
+  case ISD::AVGFLOORU:
   case ISD::SADDSAT:
   case ISD::UADDSAT:
   case ISD::SSUBSAT:
@@ -18595,6 +18598,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(UDIV_VL)
   NODE_NAME_CASE(UREM_VL)
   NODE_NAME_CASE(XOR_VL)
+  NODE_NAME_CASE(AVGFLOORU_VL)
   NODE_NAME_CASE(SADDSAT_VL)
   NODE_NAME_CASE(UADDSAT_VL)
   NODE_NAME_CASE(SSUBSAT_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -253,6 +253,9 @@ enum NodeType : unsigned {
   SSUBSAT_VL,
   USUBSAT_VL,
 
+  // Averaging adds of unsigned integers.
+  AVGFLOORU_VL,
+
   MULHS_VL,
   MULHU_VL,
   FADD_VL,
@@ -902,6 +905,7 @@ class RISCVTargetLowering : public TargetLowering {
   SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op,
                                             SelectionDAG &DAG) const;
   SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerUnsignedAvgFloor(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -1131,6 +1131,22 @@ defm : VPatBinarySDNode_VV_VX_VI<uaddsat, "PseudoVSADDU">;
 defm : VPatBinarySDNode_VV_VX<ssubsat, "PseudoVSSUB">;
 defm : VPatBinarySDNode_VV_VX<usubsat, "PseudoVSSUBU">;
 
+// 12.2. Vector Single-Width Averaging Add and Subtract
+foreach vti = AllIntegerVectors in {
+  let Predicates = GetVTypePredicates<vti>.Predicates in {
+    def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1),
+                         (vti.Vector vti.RegClass:$rs2)),
+              (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX)
+                (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2,
+                0b10, vti.AVL, vti.Log2SEW, TA_MA)>;
+    def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1),
+                         (vti.Vector (SplatPat (XLenVT GPR:$rs2)))),
+              (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX)
+                (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
+                0b10, vti.AVL, vti.Log2SEW, TA_MA)>;
+  }
+}
+
 // 15. Vector Mask Instructions
 
 // 15.1. Vector Mask-Register Logical Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -111,6 +111,7 @@ def riscv_ctlz_vl       : SDNode<"RISCVISD::CTLZ_VL",       SDT_RISCVIntUnOp_VL>
 def riscv_cttz_vl       : SDNode<"RISCVISD::CTTZ_VL",       SDT_RISCVIntUnOp_VL>;
 def riscv_ctpop_vl      : SDNode<"RISCVISD::CTPOP_VL",      SDT_RISCVIntUnOp_VL>;
 
+def riscv_avgflooru_vl  : SDNode<"RISCVISD::AVGFLOORU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
 def riscv_saddsat_vl   : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
 def riscv_uaddsat_vl   : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
 def riscv_ssubsat_vl   : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>;
@@ -2306,6 +2307,24 @@ defm : VPatBinaryVL_VV_VX_VI<riscv_uaddsat_vl, "PseudoVSADDU">;
 defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">;
 defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">;
 
+// 12.2. Vector Single-Width Averaging Add and Subtract
+foreach vti = AllIntegerVectors in {
+  let Predicates = GetVTypePredicates<vti>.Predicates in {
+    def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1),
+                                  (vti.Vector vti.RegClass:$rs2),
+                                  vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
+              (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK")
+                vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2,
+                (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+    def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1),
+                                  (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
+                                  vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
+              (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK")
+                vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2,
+                (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+  }
+}
+
 // 12.5. Vector Narrowing Fixed-Point Clip Instructions
 class VPatTruncSatClipMaxMinBase<string inst,
                                  VTypeInfo vti,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
@@ -0,0 +1,220 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define <8 x i8> @vaaddu_vv_v8i8(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    csrwi vxrm, 2
+; CHECK-NEXT:    vaaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %xzv = zext <8 x i8> %x to <8 x i16>
+  %yzv = zext <8 x i8> %y to <8 x i16>
+  %add = add nuw nsw <8 x i16> %xzv, %yzv
+  %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %ret = trunc <8 x i16> %div to <8 x i8>
+  ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_vx_v8i8(<8 x i8> %x, i8 %y) {
+; CHECK-LABEL: vaaddu_vx_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    csrwi vxrm, 2
+; CHECK-NEXT:    vaaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %xzv = zext <8 x i8> %x to <8 x i16>
+  %yhead = insertelement <8 x i8> poison, i8 %y, i32 0
+  %ysplat = shufflevector <8 x i8> %yhead, <8 x i8> poison, <8 x i32> zeroinitializer
+  %yzv = zext <8 x i8> %ysplat to <8 x i16>
+  %add = add nuw nsw <8 x i16> %xzv, %yzv
+  %one = insertelement <8 x i16> poison, i16 1, i32 0
+  %splat = shufflevector <8 x i16> %one, <8 x i16> poison, <8 x i32> zeroinitializer
+  %div = lshr <8 x i16> %add, %splat
+  %ret = trunc <8 x i16> %div to <8 x i8>
+  ret <8 x i8> %ret
+}
+
+
+define <8 x i8> @vaaddu_vv_v8i8_sexti16(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_sexti16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vwadd.vv v10, v8, v9
+; CHECK-NEXT:    vnsrl.wi v8, v10, 1
+; CHECK-NEXT:    ret
+  %xzv = sext <8 x i8> %x to <8 x i16>
+  %yzv = sext <8 x i8> %y to <8 x i16>
+  %add = add nuw nsw <8 x i16> %xzv, %yzv
+  %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %ret = trunc <8 x i16> %div to <8 x i8>
+  ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_vv_v8i8_zexti32(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_zexti32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    csrwi vxrm, 2
+; CHECK-NEXT:    vaaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %xzv = zext <8 x i8> %x to <8 x i32>
+  %yzv = zext <8 x i8> %y to <8 x i32>
+  %add = add nuw nsw <8 x i32> %xzv, %yzv
+  %div = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %ret = trunc <8 x i32> %div to <8 x i8>
+  ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_vv_v8i8_lshr2(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_lshr2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vwaddu.vv v10, v8, v9
+; CHECK-NEXT:    vnsrl.wi v8, v10, 2
+; CHECK-NEXT:    ret
+  %xzv = zext <8 x i8> %x to <8 x i16>
+  %yzv = zext <8 x i8> %y to <8 x i16>
+  %add = add nuw nsw <8 x i16> %xzv, %yzv
+  %div = lshr <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  %ret = trunc <8 x i16> %div to <8 x i8>
+  ret <8 x i8> %ret
+}
+
+define <8 x i16> @vaaddu_vv_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    csrwi vxrm, 2
+; CHECK-NEXT:    vaaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %xzv = zext <8 x i16> %x to <8 x i32>
+  %yzv = zext <8 x i16> %y to <8 x i32>
+  %add = add nuw nsw <8 x i32> %xzv, %yzv
+  %div = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %ret = trunc <8 x i32> %div to <8 x i16>
+  ret <8 x i16> %ret
+}
+
+define <8 x i16> @vaaddu_vx_v8i16(<8 x i16> %x, i16 %y) {
+; CHECK-LABEL: vaaddu_vx_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    csrwi vxrm, 2
+; CHECK-NEXT:    vaaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %xzv = zext <8 x i16> %x to <8 x i32>
+  %yhead = insertelement <8 x i16> poison, i16 %y, i16 0
+  %ysplat = shufflevector <8 x i16> %yhead, <8 x i16> poison, <8 x i32> zeroinitializer
+  %yzv = zext <8 x i16> %ysplat to <8 x i32>
+  %add = add nuw nsw <8 x i32> %xzv, %yzv
+  %one = insertelement <8 x i32> poison, i32 1, i32 0
+  %splat = shufflevector <8 x i32> %one, <8 x i32> poison, <8 x i32> zeroinitializer
+  %div = lshr <8 x i32> %add, %splat
+  %ret = trunc <8 x i32> %div to <8 x i16>
+  ret <8 x i16> %ret
+}
+
+define <8 x i32> @vaaddu_vv_v8i32(<8 x i32> %x, <8 x i32> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    csrwi vxrm, 2
+; CHECK-NEXT:    vaaddu.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %xzv = zext <8 x i32> %x to <8 x i64>
+  %yzv = zext <8 x i32> %y to <8 x i64>
+  %add = add nuw nsw <8 x i64> %xzv, %yzv
+  %div = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+  %ret = trunc <8 x i64> %div to <8 x i32>
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @vaaddu_vx_v8i32(<8 x i32> %x, i32 %y) {
+; CHECK-LABEL: vaaddu_vx_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    csrwi vxrm, 2
+; CHECK-NEXT:    vaaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %xzv = zext <8 x i32> %x to <8 x i64>
+  %yhead = insertelement <8 x i32> poison, i32 %y, i32 0
+  %ysplat = shufflevector <8 x i32> %yhead, <8 x i32> poison, <8 x i32> zeroinitializer
+  %yzv = zext <8 x i32> %ysplat to <8 x i64>
+  %add = add nuw nsw <8 x i64> %xzv, %yzv
+  %one = insertelement <8 x i64> poison, i64 1, i64 0
+  %splat = shufflevector <8 x i64> %one, <8 x i64> poison, <8 x i32> zeroinitializer
+  %div = lshr <8 x i64> %add, %splat
+  %ret = trunc <8 x i64> %div to <8 x i32>
+  ret <8 x i32> %ret
+}
+
+define <8 x i64> @vaaddu_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT:    csrwi vxrm, 2
+; CHECK-NEXT:    vaaddu.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %xzv = zext <8 x i64> %x to <8 x i128>
+  %yzv = zext <8 x i64> %y to <8 x i128>
+  %add = add nuw nsw <8 x i128> %xzv, %yzv
+  %div = lshr <8 x i128> %add, <i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1>
+  %ret = trunc <8 x i128> %div to <8 x i64>
+  ret <8 x i64> %ret
+}
+
+define <8 x i1> @vaaddu_vv_v8i1(<8 x i1> %x, <8 x i1> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.i v9, 0
+; CHECK-NEXT:    vmerge.vim v10, v9, 1, v0
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmerge.vim v8, v9, 1, v0
+; CHECK-NEXT:    csrwi vxrm, 2
+; CHECK-NEXT:    vaaddu.vv v8, v10, v8
+; CHECK-NEXT:    vand.vi v8, v8, 1
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
+  %xzv = zext <8 x i1> %x to <8 x i8>
+  %yzv = zext <8 x i1> %y to <8 x i8>
+  %add = add nuw nsw <8 x i8> %xzv, %yzv
+  %div = lshr <8 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %ret = trunc <8 x i8> %div to <8 x i1>
+  ret <8 x i1> %ret
+}
+
+define <8 x i64> @vaaddu_vx_v8i64(<8 x i64> %x, i64 %y) {
+; RV32-LABEL: vaaddu_vx_v8i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    csrwi vxrm, 2
+; RV32-NEXT:    vaaddu.vv v8, v8, v12
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vaaddu_vx_v8i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    csrwi vxrm, 2
+; RV64-NEXT:    vaaddu.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %xzv = zext <8 x i64> %x to <8 x i128>
+  %yhead = insertelement <8 x i64> poison, i64 %y, i64 0
+  %ysplat = shufflevector <8 x i64> %yhead, <8 x i64> poison, <8 x i32> zeroinitializer
+  %yzv = zext <8 x i64> %ysplat to <8 x i128>
+  %add = add nuw nsw <8 x i128> %xzv, %yzv
+  %one = insertelement <8 x i128> poison, i128 1, i128 0
+  %splat = shufflevector <8 x i128> %one, <8 x i128> poison, <8 x i32> zeroinitializer
+  %div = lshr <8 x i128> %add, %splat
+  %ret = trunc <8 x i128> %div to <8 x i64>
+  ret <8 x i64> %ret
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll