[RISCV] Support bf16 vmv.v.v and vmerge.vvm intrinsics with `zvfbfmin` #101611

4vtomat · 2024-08-02T04:10:07Z

These two intrinsics are supported for f16 with zvfhmin, also support
them in bf16 to make it aligned to f16.

This resolve: riscv-non-isa/rvv-intrinsic-doc#349

These two intrinsics are supported for f16 with `zvfhmin`, also support them in bf16 to make it aligned to f16.

llvmbot · 2024-08-02T04:10:39Z

@llvm/pr-subscribers-clang

@llvm/pr-subscribers-backend-risc-v

Author: Brandon Wu (4vtomat)

Changes

These two intrinsics are supported for f16 with zvfhmin, also support
them in bf16 to make it aligned to f16.

Patch is 24.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101611.diff

6 Files Affected:

(modified) clang/include/clang/Basic/riscv_vector.td (+6)
(modified) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vmerge.c (+60-1)
(modified) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vmerge.c (+61-1)
(modified) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vmerge.c (+61-1)
(modified) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vmerge.c (+61-1)
(modified) llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td (+8)

diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
index a0820e2093bc2..e4e936f914f6f 100644
--- a/clang/include/clang/Basic/riscv_vector.td
+++ b/clang/include/clang/Basic/riscv_vector.td
@@ -1378,6 +1378,9 @@ let HasMasked = false,
     let RequiredFeatures = ["Zvfhmin"] in
       defm vmv_v : RVVOutBuiltinSet<"vmv_v_v", "x",
                                     [["v", "v", "vv"]]>;
+    let RequiredFeatures = ["Zvfbfmin"] in
+      defm vmv_v : RVVOutBuiltinSet<"vmv_v_v", "y",
+                                    [["v", "v", "vv"]]>;
   let SupportOverloading = false in
     defm vmv_v : RVVOutBuiltinSet<"vmv_v_x", "csil",
                                    [["x", "v", "ve"],
@@ -1890,6 +1893,9 @@ let HasMasked = false,
   let RequiredFeatures = ["Zvfhmin"] in
     defm vmerge : RVVOutOp1BuiltinSet<"vmerge", "x",
                                       [["vvm", "v", "vvvm"]]>;
+  let RequiredFeatures = ["Zvfbfmin"] in
+    defm vmerge : RVVOutOp1BuiltinSet<"vmerge", "y",
+                                      [["vvm", "v", "vvvm"]]>;
   defm vfmerge : RVVOutOp1BuiltinSet<"vfmerge", "xfd",
                                      [["vfm", "v", "vvem"]]>;
 }
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vmerge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vmerge.c
index fb41a07cccec0..d2eb01c70950e 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vmerge.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vmerge.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
-// RUN:   -target-feature +zvfhmin -disable-O0-optnone  \
+// RUN:   -target-feature +zvfhmin -target-feature +zvfbfmin -disable-O0-optnone  \
 // RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
 // RUN:   FileCheck --check-prefix=CHECK-RV64 %s
 
@@ -1037,3 +1037,62 @@ vfloat64m8_t test_vmerge_vvm_f64m8(vfloat64m8_t op1, vfloat64m8_t op2, vbool8_t
   return __riscv_vmerge_vvm_f64m8(op1, op2, mask, vl);
 }
 
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vmerge_vvm_bf16mf4
+// CHECK-RV64-SAME: (<vscale x 1 x bfloat> [[OP1:%.*]], <vscale x 1 x bfloat> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vmerge.nxv1bf16.nxv1bf16.i64(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> [[OP1]], <vscale x 1 x bfloat> [[OP2]], <vscale x 1 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vmerge_vvm_bf16mf4(vbfloat16mf4_t op1, vbfloat16mf4_t op2, vbool64_t mask, size_t vl) {
+  return __riscv_vmerge_vvm_bf16mf4(op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vmerge_vvm_bf16mf2
+// CHECK-RV64-SAME: (<vscale x 2 x bfloat> [[OP1:%.*]], <vscale x 2 x bfloat> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vmerge.nxv2bf16.nxv2bf16.i64(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> [[OP1]], <vscale x 2 x bfloat> [[OP2]], <vscale x 2 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vmerge_vvm_bf16mf2(vbfloat16mf2_t op1, vbfloat16mf2_t op2, vbool32_t mask, size_t vl) {
+  return __riscv_vmerge_vvm_bf16mf2(op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vmerge_vvm_bf16m1
+// CHECK-RV64-SAME: (<vscale x 4 x bfloat> [[OP1:%.*]], <vscale x 4 x bfloat> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vmerge.nxv4bf16.nxv4bf16.i64(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> [[OP1]], <vscale x 4 x bfloat> [[OP2]], <vscale x 4 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vmerge_vvm_bf16m1(vbfloat16m1_t op1, vbfloat16m1_t op2, vbool16_t mask, size_t vl) {
+  return __riscv_vmerge_vvm_bf16m1(op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vmerge_vvm_bf16m2
+// CHECK-RV64-SAME: (<vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vmerge.nxv8bf16.nxv8bf16.i64(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> [[OP1]], <vscale x 8 x bfloat> [[OP2]], <vscale x 8 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vmerge_vvm_bf16m2(vbfloat16m2_t op1, vbfloat16m2_t op2, vbool8_t mask, size_t vl) {
+  return __riscv_vmerge_vvm_bf16m2(op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vmerge_vvm_bf16m4
+// CHECK-RV64-SAME: (<vscale x 16 x bfloat> [[OP1:%.*]], <vscale x 16 x bfloat> [[OP2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vmerge.nxv16bf16.nxv16bf16.i64(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> [[OP1]], <vscale x 16 x bfloat> [[OP2]], <vscale x 16 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vmerge_vvm_bf16m4(vbfloat16m4_t op1, vbfloat16m4_t op2, vbool4_t mask, size_t vl) {
+  return __riscv_vmerge_vvm_bf16m4(op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vmerge_vvm_bf16m8
+// CHECK-RV64-SAME: (<vscale x 32 x bfloat> [[OP1:%.*]], <vscale x 32 x bfloat> [[OP2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vmerge.nxv32bf16.nxv32bf16.i64(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> [[OP1]], <vscale x 32 x bfloat> [[OP2]], <vscale x 32 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vmerge_vvm_bf16m8(vbfloat16m8_t op1, vbfloat16m8_t op2, vbool2_t mask, size_t vl) {
+  return __riscv_vmerge_vvm_bf16m8(op1, op2, mask, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vmerge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vmerge.c
index d67aa70fc8c8a..2db27fdc512e5 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vmerge.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vmerge.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
-// RUN:   -target-feature +zvfhmin -disable-O0-optnone  \
+// RUN:   -target-feature +zvfhmin -target-feature +zvfbfmin -disable-O0-optnone  \
 // RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
 // RUN:   FileCheck --check-prefix=CHECK-RV64 %s
 
@@ -1037,3 +1037,63 @@ vfloat64m8_t test_vmerge_vvm_f64m8(vfloat64m8_t op1, vfloat64m8_t op2, vbool8_t
   return __riscv_vmerge(op1, op2, mask, vl);
 }
 
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vmerge_vvm_bf16mf4
+// CHECK-RV64-SAME: (<vscale x 1 x bfloat> [[OP1:%.*]], <vscale x 1 x bfloat> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vmerge.nxv1bf16.nxv1bf16.i64(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> [[OP1]], <vscale x 1 x bfloat> [[OP2]], <vscale x 1 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vmerge_vvm_bf16mf4(vbfloat16mf4_t op1, vbfloat16mf4_t op2, vbool64_t mask, size_t vl) {
+  return __riscv_vmerge(op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vmerge_vvm_bf16mf2
+// CHECK-RV64-SAME: (<vscale x 2 x bfloat> [[OP1:%.*]], <vscale x 2 x bfloat> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vmerge.nxv2bf16.nxv2bf16.i64(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> [[OP1]], <vscale x 2 x bfloat> [[OP2]], <vscale x 2 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vmerge_vvm_bf16mf2(vbfloat16mf2_t op1, vbfloat16mf2_t op2, vbool32_t mask, size_t vl) {
+  return __riscv_vmerge(op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vmerge_vvm_bf16m1
+// CHECK-RV64-SAME: (<vscale x 4 x bfloat> [[OP1:%.*]], <vscale x 4 x bfloat> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vmerge.nxv4bf16.nxv4bf16.i64(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> [[OP1]], <vscale x 4 x bfloat> [[OP2]], <vscale x 4 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vmerge_vvm_bf16m1(vbfloat16m1_t op1, vbfloat16m1_t op2, vbool16_t mask, size_t vl) {
+  return __riscv_vmerge(op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vmerge_vvm_bf16m2
+// CHECK-RV64-SAME: (<vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vmerge.nxv8bf16.nxv8bf16.i64(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> [[OP1]], <vscale x 8 x bfloat> [[OP2]], <vscale x 8 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vmerge_vvm_bf16m2(vbfloat16m2_t op1, vbfloat16m2_t op2, vbool8_t mask, size_t vl) {
+  return __riscv_vmerge(op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vmerge_vvm_bf16m4
+// CHECK-RV64-SAME: (<vscale x 16 x bfloat> [[OP1:%.*]], <vscale x 16 x bfloat> [[OP2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vmerge.nxv16bf16.nxv16bf16.i64(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> [[OP1]], <vscale x 16 x bfloat> [[OP2]], <vscale x 16 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vmerge_vvm_bf16m4(vbfloat16m4_t op1, vbfloat16m4_t op2, vbool4_t mask, size_t vl) {
+  return __riscv_vmerge(op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vmerge_vvm_bf16m8
+// CHECK-RV64-SAME: (<vscale x 32 x bfloat> [[OP1:%.*]], <vscale x 32 x bfloat> [[OP2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vmerge.nxv32bf16.nxv32bf16.i64(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> [[OP1]], <vscale x 32 x bfloat> [[OP2]], <vscale x 32 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vmerge_vvm_bf16m8(vbfloat16m8_t op1, vbfloat16m8_t op2, vbool2_t mask, size_t vl) {
+  return __riscv_vmerge(op1, op2, mask, vl);
+}
+
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vmerge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vmerge.c
index 4f723c579597e..7667c375937bd 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vmerge.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vmerge.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
-// RUN:   -target-feature +zvfhmin -disable-O0-optnone  \
+// RUN:   -target-feature +zvfhmin -target-feature +zvfbfmin -disable-O0-optnone  \
 // RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
 // RUN:   FileCheck --check-prefix=CHECK-RV64 %s
 
@@ -1037,3 +1037,63 @@ vfloat64m8_t test_vmerge_vvm_f64m8_tu(vfloat64m8_t maskedoff, vfloat64m8_t op1,
   return __riscv_vmerge_vvm_f64m8_tu(maskedoff, op1, op2, mask, vl);
 }
 
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vmerge_vvm_bf16mf4_tu
+// CHECK-RV64-SAME: (<vscale x 1 x bfloat> [[MASKEDOFF:%.*]], <vscale x 1 x bfloat> [[OP1:%.*]], <vscale x 1 x bfloat> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vmerge.nxv1bf16.nxv1bf16.i64(<vscale x 1 x bfloat> [[MASKEDOFF]], <vscale x 1 x bfloat> [[OP1]], <vscale x 1 x bfloat> [[OP2]], <vscale x 1 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vmerge_vvm_bf16mf4_tu(vbfloat16mf4_t maskedoff, vbfloat16mf4_t op1, vbfloat16mf4_t op2, vbool64_t mask, size_t vl) {
+  return __riscv_vmerge_vvm_bf16mf4_tu(maskedoff, op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vmerge_vvm_bf16mf2_tu
+// CHECK-RV64-SAME: (<vscale x 2 x bfloat> [[MASKEDOFF:%.*]], <vscale x 2 x bfloat> [[OP1:%.*]], <vscale x 2 x bfloat> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vmerge.nxv2bf16.nxv2bf16.i64(<vscale x 2 x bfloat> [[MASKEDOFF]], <vscale x 2 x bfloat> [[OP1]], <vscale x 2 x bfloat> [[OP2]], <vscale x 2 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vmerge_vvm_bf16mf2_tu(vbfloat16mf2_t maskedoff, vbfloat16mf2_t op1, vbfloat16mf2_t op2, vbool32_t mask, size_t vl) {
+  return __riscv_vmerge_vvm_bf16mf2_tu(maskedoff, op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vmerge_vvm_bf16m1_tu
+// CHECK-RV64-SAME: (<vscale x 4 x bfloat> [[MASKEDOFF:%.*]], <vscale x 4 x bfloat> [[OP1:%.*]], <vscale x 4 x bfloat> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vmerge.nxv4bf16.nxv4bf16.i64(<vscale x 4 x bfloat> [[MASKEDOFF]], <vscale x 4 x bfloat> [[OP1]], <vscale x 4 x bfloat> [[OP2]], <vscale x 4 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vmerge_vvm_bf16m1_tu(vbfloat16m1_t maskedoff, vbfloat16m1_t op1, vbfloat16m1_t op2, vbool16_t mask, size_t vl) {
+  return __riscv_vmerge_vvm_bf16m1_tu(maskedoff, op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vmerge_vvm_bf16m2_tu
+// CHECK-RV64-SAME: (<vscale x 8 x bfloat> [[MASKEDOFF:%.*]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vmerge.nxv8bf16.nxv8bf16.i64(<vscale x 8 x bfloat> [[MASKEDOFF]], <vscale x 8 x bfloat> [[OP1]], <vscale x 8 x bfloat> [[OP2]], <vscale x 8 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vmerge_vvm_bf16m2_tu(vbfloat16m2_t maskedoff, vbfloat16m2_t op1, vbfloat16m2_t op2, vbool8_t mask, size_t vl) {
+  return __riscv_vmerge_vvm_bf16m2_tu(maskedoff, op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vmerge_vvm_bf16m4_tu
+// CHECK-RV64-SAME: (<vscale x 16 x bfloat> [[MASKEDOFF:%.*]], <vscale x 16 x bfloat> [[OP1:%.*]], <vscale x 16 x bfloat> [[OP2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vmerge.nxv16bf16.nxv16bf16.i64(<vscale x 16 x bfloat> [[MASKEDOFF]], <vscale x 16 x bfloat> [[OP1]], <vscale x 16 x bfloat> [[OP2]], <vscale x 16 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vmerge_vvm_bf16m4_tu(vbfloat16m4_t maskedoff, vbfloat16m4_t op1, vbfloat16m4_t op2, vbool4_t mask, size_t vl) {
+  return __riscv_vmerge_vvm_bf16m4_tu(maskedoff, op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vmerge_vvm_bf16m8_tu
+// CHECK-RV64-SAME: (<vscale x 32 x bfloat> [[MASKEDOFF:%.*]], <vscale x 32 x bfloat> [[OP1:%.*]], <vscale x 32 x bfloat> [[OP2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vmerge.nxv32bf16.nxv32bf16.i64(<vscale x 32 x bfloat> [[MASKEDOFF]], <vscale x 32 x bfloat> [[OP1]], <vscale x 32 x bfloat> [[OP2]], <vscale x 32 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vmerge_vvm_bf16m8_tu(vbfloat16m8_t maskedoff, vbfloat16m8_t op1, vbfloat16m8_t op2, vbool2_t mask, size_t vl) {
+  return __riscv_vmerge_vvm_bf16m8_tu(maskedoff, op1, op2, mask, vl);
+}
+
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vmerge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vmerge.c
index 8149be4cb2e71..ba1838b3fc097 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vmerge.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vmerge.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
-// RUN:   -target-feature +zvfhmin -disable-O0-optnone  \
+// RUN:   -target-feature +zvfhmin -target-feature +zvfbfmin -disable-O0-optnone  \
 // RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
 // RUN:   FileCheck --check-prefix=CHECK-RV64 %s
 
@@ -1037,3 +1037,63 @@ vfloat64m8_t test_vmerge_vvm_f64m8_tu(vfloat64m8_t maskedoff, vfloat64m8_t op1,
   return __riscv_vmerge_tu(maskedoff, op1, op2, mask, vl);
 }
 
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vmerge_vvm_bf16mf4_tu
+// CHECK-RV64-SAME: (<vscale x 1 x bfloat> [[MASKEDOFF:%.*]], <vscale x 1 x bfloat> [[OP1:%.*]], <vscale x 1 x bfloat> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vmerge.nxv1bf16.nxv1bf16.i64(<vscale x 1 x bfloat> [[MASKEDOFF]], <vscale x 1 x bfloat> [[OP1]], <vscale x 1 x bfloat> [[OP2]], <vscale x 1 x i1> [[MASK]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vmerge_vvm_bf16mf4_tu(vbfloat16mf4_t maskedoff, vbfloat16mf4_t op1, vbfloat16mf4_t op2, vbool64_t mask, size_t vl) {
+  return __riscv_vmerge_tu(maskedoff, op1, op2, mask, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vmerge_vvm_bf16mf2_tu
+// CHECK-RV64-SAME: (<vscale x 2 x bfloat> [[MASKEDOFF:%.*]], <vscale x 2 x bfloat> [[OP1:%.*]], <vs...
[truncated]

topperc

LGTM

joshua-arch1 · 2024-08-02T06:13:49Z

I'm wondering why we need these intrinsics since we do not have vmv.v.v and vmerge.vvm instructions for bf16.

topperc · 2024-08-02T06:18:34Z

I'm wondering why we need these intrinsics since we do not have vmv.v.v and vmerge.vvm instructions for bf16.

vmv.v.v and vmerge.vvm don't interpret the value in the elements. They work for integer or FP or bf16. If we don't provide the intrinsics then the user needs to use vreinterpret to do a vmerge or vmv.v.v operation on a vector of bf16.

joshua-arch1 · 2024-08-02T06:34:49Z

I'm wondering why we need these intrinsics since we do not have vmv.v.v and vmerge.vvm instructions for bf16.

vmv.v.v and vmerge.vvm don't interpret the value in the elements. They work for integer or FP or bf16. If we don't provide the intrinsics then the user needs to use vreinterpret to do a vmerge or vmv.v.v operation on a vector of bf16.

I know what vmerge and vmv.v.v mean, but in a similar way, does it mean we need to add all the intrinsics for bf16 to keep consistent with other floating-point type?

topperc · 2024-08-02T06:44:58Z

I'm wondering why we need these intrinsics since we do not have vmv.v.v and vmerge.vvm instructions for bf16.

vmv.v.v and vmerge.vvm don't interpret the value in the elements. They work for integer or FP or bf16. If we don't provide the intrinsics then the user needs to use vreinterpret to do a vmerge or vmv.v.v operation on a vector of bf16.

I know what vmerge and vmv.v.v mean, but in a similar way, does it mean we need to add all the intrinsics for bf16 to keep consistent with other floating-point type?

We already have load, store, vreinterpret, lmul_ext, lmul_trunc, vget, vcreate, vundefined. I file 3 issues on inconsistencies between Zvfhmin and Zvfbfmin

riscv-non-isa/rvv-intrinsic-doc#349
riscv-non-isa/rvv-intrinsic-doc#350
riscv-non-isa/rvv-intrinsic-doc#351

[RISCV] Support bf16 vmv.v.v and vmerge.vvm intrinsics with zvfbfmin

53a1851

These two intrinsics are supported for f16 with `zvfhmin`, also support them in bf16 to make it aligned to f16.

llvmbot added clang Clang issues not falling into any other category backend:RISC-V clang:frontend Language frontend issues, e.g. anything involving "Sema" labels Aug 2, 2024

4vtomat requested review from rofirrim, kito-cheng, topperc and wangpc-pp August 2, 2024 04:11

topperc approved these changes Aug 2, 2024

View reviewed changes

fixup! missing vmv tests

28d93b7

4vtomat merged commit 3027688 into llvm:main Aug 6, 2024
7 checks passed

4vtomat deleted the support_bf16_vmv_vmerge branch August 6, 2024 09:59

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[RISCV] Support bf16 vmv.v.v and vmerge.vvm intrinsics with `zvfbfmin` #101611

[RISCV] Support bf16 vmv.v.v and vmerge.vvm intrinsics with `zvfbfmin` #101611

Uh oh!

4vtomat commented Aug 2, 2024 •

edited

Loading

Uh oh!

llvmbot commented Aug 2, 2024 •

edited

Loading

Uh oh!

topperc left a comment

Uh oh!

joshua-arch1 commented Aug 2, 2024

Uh oh!

topperc commented Aug 2, 2024 •

edited by joshua-arch1

Loading

Uh oh!

joshua-arch1 commented Aug 2, 2024

Uh oh!

topperc commented Aug 2, 2024

Uh oh!

Uh oh!

Uh oh!

[RISCV] Support bf16 vmv.v.v and vmerge.vvm intrinsics with zvfbfmin #101611

[RISCV] Support bf16 vmv.v.v and vmerge.vvm intrinsics with zvfbfmin #101611

Uh oh!

Conversation

4vtomat commented Aug 2, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Aug 2, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

topperc left a comment

Choose a reason for hiding this comment

Uh oh!

joshua-arch1 commented Aug 2, 2024

Uh oh!

topperc commented Aug 2, 2024 • edited by joshua-arch1 Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

joshua-arch1 commented Aug 2, 2024

Uh oh!

topperc commented Aug 2, 2024

Uh oh!

Uh oh!

Uh oh!

[RISCV] Support bf16 vmv.v.v and vmerge.vvm intrinsics with `zvfbfmin` #101611

[RISCV] Support bf16 vmv.v.v and vmerge.vvm intrinsics with `zvfbfmin` #101611

4vtomat commented Aug 2, 2024 •

edited

Loading

llvmbot commented Aug 2, 2024 •

edited

Loading

topperc commented Aug 2, 2024 •

edited by joshua-arch1

Loading