-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Support Xsfvqmaccdod and Xsfvqmaccqoq extensions #68295
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-risc-v ChangesSiFive Int8 Matrix Multiplication Extensions Specification Patch is 185.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/68295.diff 55 Files Affected:
diff --git a/clang/include/clang/Basic/riscv_sifive_vector.td b/clang/include/clang/Basic/riscv_sifive_vector.td
index 6583a7eb7b2e59b..079dc7768fc0580 100644
--- a/clang/include/clang/Basic/riscv_sifive_vector.td
+++ b/clang/include/clang/Basic/riscv_sifive_vector.td
@@ -103,3 +103,27 @@ let SupportOverloading = false in {
defm sf_vc_v_fvw : RVVVCIXBuiltinSet<["si"], "UwKzUwUvFe", [-1, 0, 2, 3], UseGPR=0>;
}
}
+
+multiclass RVVVQMACCBuiltinSet<list<list<string>> suffixes_prototypes> {
+ let OverloadedName = NAME,
+ Name = NAME,
+ HasMasked = false,
+ Log2LMUL = [0, 1, 2, 3] in
+ defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "c", suffixes_prototypes>;
+}
+
+let UnMaskedPolicyScheme = HasPolicyOperand in
+ let RequiredFeatures = ["Xsfvqmaccdod"] in {
+ defm sf_vqmaccu_2x8x2 : RVVVQMACCBuiltinSet<[["", "4", "44SUvUv"]]>;
+ defm sf_vqmacc_2x8x2 : RVVVQMACCBuiltinSet<[["", "4", "44Svv"]]>;
+ defm sf_vqmaccus_2x8x2 : RVVVQMACCBuiltinSet<[["", "4", "44SUvv"]]>;
+ defm sf_vqmaccsu_2x8x2 : RVVVQMACCBuiltinSet<[["", "4", "44SvUv"]]>;
+ }
+
+let UnMaskedPolicyScheme = HasPolicyOperand in
+ let RequiredFeatures = ["Xsfvqmaccqoq"] in {
+ defm sf_vqmaccu_4x8x4 : RVVVQMACCBuiltinSet<[["", "4", "44SUvUv"]]>;
+ defm sf_vqmacc_4x8x4 : RVVVQMACCBuiltinSet<[["", "4", "44Svv"]]>;
+ defm sf_vqmaccus_4x8x4 : RVVVQMACCBuiltinSet<[["", "4", "44SUvv"]]>;
+ defm sf_vqmaccsu_4x8x4 : RVVVQMACCBuiltinSet<[["", "4", "44SvUv"]]>;
+ }
diff --git a/clang/include/clang/Basic/riscv_vector_common.td b/clang/include/clang/Basic/riscv_vector_common.td
index 141fac9d68e6d54..b758b7c13b37ec1 100644
--- a/clang/include/clang/Basic/riscv_vector_common.td
+++ b/clang/include/clang/Basic/riscv_vector_common.td
@@ -59,6 +59,8 @@
// element type which is four times as wide as the element type of 'v'
// o: computes a vector type identical to what 'v' computes except for the
// element type which is eight times as wide as the element type of 'v'
+// 4: computes a vector type with LMUL identical to what 'v' computes except
+// for the SEW which is 4 times as wide as the SEW of 'v'
// m: computes a vector type identical to what 'v' computes except for the
// element type which is bool
// 0: void type, ignores "t"
diff --git a/clang/include/clang/Support/RISCVVIntrinsicUtils.h b/clang/include/clang/Support/RISCVVIntrinsicUtils.h
index 8ba57d77221dc52..cb76ef598c8ad0c 100644
--- a/clang/include/clang/Support/RISCVVIntrinsicUtils.h
+++ b/clang/include/clang/Support/RISCVVIntrinsicUtils.h
@@ -35,6 +35,7 @@ enum class VectorTypeModifier : uint8_t {
Widening2XVector,
Widening4XVector,
Widening8XVector,
+ SEW4XVector,
MaskVector,
Log2EEW3,
Log2EEW4,
@@ -485,14 +486,16 @@ enum RVVRequire : uint16_t {
RVV_REQ_RV64 = 1 << 0,
RVV_REQ_ZvfhminOrZvfh = 1 << 1,
RVV_REQ_Xsfvcp = 1 << 2,
- RVV_REQ_Zvbb = 1 << 3,
- RVV_REQ_Zvbc = 1 << 4,
- RVV_REQ_Zvkb = 1 << 5,
- RVV_REQ_Zvkg = 1 << 6,
- RVV_REQ_Zvkned = 1 << 7,
- RVV_REQ_Zvknha = 1 << 8,
- RVV_REQ_Zvksed = 1 << 9,
- RVV_REQ_Zvksh = 1 << 10,
+ RVV_REQ_Xsfvqmaccdod = 1 << 3,
+ RVV_REQ_Xsfvqmaccqoq = 1 << 4,
+ RVV_REQ_Zvbb = 1 << 5,
+ RVV_REQ_Zvbc = 1 << 6,
+ RVV_REQ_Zvkb = 1 << 7,
+ RVV_REQ_Zvkg = 1 << 8,
+ RVV_REQ_Zvkned = 1 << 9,
+ RVV_REQ_Zvknha = 1 << 10,
+ RVV_REQ_Zvksed = 1 << 11,
+ RVV_REQ_Zvksh = 1 << 12,
LLVM_MARK_AS_BITMASK_ENUM(RVV_REQ_Zvksh)
};
diff --git a/clang/lib/Sema/SemaRISCVVectorLookup.cpp b/clang/lib/Sema/SemaRISCVVectorLookup.cpp
index ae584dc68719901..acdf3260007bc3a 100644
--- a/clang/lib/Sema/SemaRISCVVectorLookup.cpp
+++ b/clang/lib/Sema/SemaRISCVVectorLookup.cpp
@@ -205,6 +205,8 @@ void RISCVIntrinsicManagerImpl::ConstructRVVIntrinsics(
static const std::pair<const char *, RVVRequire> FeatureCheckList[] = {
{"64bit", RVV_REQ_RV64},
{"xsfvcp", RVV_REQ_Xsfvcp},
+ {"xsfvqmaccdod", RVV_REQ_Xsfvqmaccdod},
+ {"xsfvqmaccqoq", RVV_REQ_Xsfvqmaccqoq},
{"experimental-zvbb", RVV_REQ_Zvbb},
{"experimental-zvbc", RVV_REQ_Zvbc},
{"experimental-zvkb", RVV_REQ_Zvkb},
diff --git a/clang/lib/Support/RISCVVIntrinsicUtils.cpp b/clang/lib/Support/RISCVVIntrinsicUtils.cpp
index c105db434dc43c9..ce46a3f5c662e87 100644
--- a/clang/lib/Support/RISCVVIntrinsicUtils.cpp
+++ b/clang/lib/Support/RISCVVIntrinsicUtils.cpp
@@ -411,6 +411,10 @@ PrototypeDescriptor::parsePrototypeDescriptor(
PT = BaseTypeModifier::Vector;
VTM = VectorTypeModifier::Widening8XVector;
break;
+ case '4':
+ PT = BaseTypeModifier::Vector;
+ VTM = VectorTypeModifier::SEW4XVector;
+ break;
case 'm':
PT = BaseTypeModifier::Vector;
VTM = VectorTypeModifier::MaskVector;
@@ -687,6 +691,10 @@ void RVVType::applyModifier(const PrototypeDescriptor &Transformer) {
LMUL.MulLog2LMUL(3);
Scale = LMUL.getScale(ElementBitwidth);
break;
+ case VectorTypeModifier::SEW4XVector:
+ ElementBitwidth *= 4;
+ Scale = LMUL.getScale(ElementBitwidth);
+ break;
case VectorTypeModifier::MaskVector:
ScalarType = ScalarTypeKind::Boolean;
Scale = LMUL.getScale(ElementBitwidth);
@@ -1150,7 +1158,7 @@ void RVVIntrinsic::updateNamesAndPolicy(
SmallVector<PrototypeDescriptor> parsePrototypes(StringRef Prototypes) {
SmallVector<PrototypeDescriptor> PrototypeDescriptors;
- const StringRef Primaries("evwqom0ztul");
+ const StringRef Primaries("evwqo4m0ztul");
while (!Prototypes.empty()) {
size_t Idx = 0;
// Skip over complex prototype because it could contain primitive type
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vqmacc_2x8x2.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vqmacc_2x8x2.c
new file mode 100644
index 000000000000000..29ebeea7a04e089
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vqmacc_2x8x2.c
@@ -0,0 +1,47 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +xsfvqmaccdod \
+// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN: FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x i32> @test_sf_vqmacc_2x8x2_i32m1
+// CHECK-RV64-SAME: (<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.sf.vqmacc.2x8x2.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD]], <vscale x 8 x i8> [[VS1]], <vscale x 8 x i8> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_sf_vqmacc_2x8x2_i32m1(vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
+ return __riscv_sf_vqmacc_2x8x2_i32m1(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x i32> @test_sf_vqmacc_2x8x2_i32m2
+// CHECK-RV64-SAME: (<vscale x 4 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.sf.vqmacc.2x8x2.nxv4i32.nxv8i8.nxv16i8.i64(<vscale x 4 x i32> [[VD]], <vscale x 8 x i8> [[VS1]], <vscale x 16 x i8> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_sf_vqmacc_2x8x2_i32m2(vint32m2_t vd, vint8m1_t vs1, vint8m2_t vs2, size_t vl) {
+ return __riscv_sf_vqmacc_2x8x2_i32m2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x i32> @test_sf_vqmacc_2x8x2_i32m4
+// CHECK-RV64-SAME: (<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.sf.vqmacc.2x8x2.nxv8i32.nxv8i8.nxv32i8.i64(<vscale x 8 x i32> [[VD]], <vscale x 8 x i8> [[VS1]], <vscale x 32 x i8> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_sf_vqmacc_2x8x2_i32m4(vint32m4_t vd, vint8m1_t vs1, vint8m4_t vs2, size_t vl) {
+ return __riscv_sf_vqmacc_2x8x2_i32m4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x i32> @test_sf_vqmacc_2x8x2_i32m8
+// CHECK-RV64-SAME: (<vscale x 16 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.sf.vqmacc.2x8x2.nxv16i32.nxv8i8.nxv64i8.i64(<vscale x 16 x i32> [[VD]], <vscale x 8 x i8> [[VS1]], <vscale x 64 x i8> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_sf_vqmacc_2x8x2_i32m8(vint32m8_t vd, vint8m1_t vs1, vint8m8_t vs2, size_t vl) {
+ return __riscv_sf_vqmacc_2x8x2_i32m8(vd, vs1, vs2, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vqmacc_4x8x4.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vqmacc_4x8x4.c
new file mode 100644
index 000000000000000..935cb2e007d3564
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vqmacc_4x8x4.c
@@ -0,0 +1,47 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +xsfvqmaccqoq \
+// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN: FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x i32> @test_sf_vqmacc_4x8x4_i32m1
+// CHECK-RV64-SAME: (<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.sf.vqmacc.4x8x4.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD]], <vscale x 8 x i8> [[VS1]], <vscale x 8 x i8> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_sf_vqmacc_4x8x4_i32m1(vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
+ return __riscv_sf_vqmacc_4x8x4_i32m1(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x i32> @test_sf_vqmacc_4x8x4_i32m2
+// CHECK-RV64-SAME: (<vscale x 4 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.sf.vqmacc.4x8x4.nxv4i32.nxv8i8.nxv16i8.i64(<vscale x 4 x i32> [[VD]], <vscale x 8 x i8> [[VS1]], <vscale x 16 x i8> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_sf_vqmacc_4x8x4_i32m2(vint32m2_t vd, vint8m1_t vs1, vint8m2_t vs2, size_t vl) {
+ return __riscv_sf_vqmacc_4x8x4_i32m2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x i32> @test_sf_vqmacc_4x8x4_i32m4
+// CHECK-RV64-SAME: (<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.sf.vqmacc.4x8x4.nxv8i32.nxv8i8.nxv32i8.i64(<vscale x 8 x i32> [[VD]], <vscale x 8 x i8> [[VS1]], <vscale x 32 x i8> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_sf_vqmacc_4x8x4_i32m4(vint32m4_t vd, vint8m1_t vs1, vint8m4_t vs2, size_t vl) {
+ return __riscv_sf_vqmacc_4x8x4_i32m4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x i32> @test_sf_vqmacc_4x8x4_i32m8
+// CHECK-RV64-SAME: (<vscale x 16 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.sf.vqmacc.4x8x4.nxv16i32.nxv8i8.nxv64i8.i64(<vscale x 16 x i32> [[VD]], <vscale x 8 x i8> [[VS1]], <vscale x 64 x i8> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_sf_vqmacc_4x8x4_i32m8(vint32m8_t vd, vint8m1_t vs1, vint8m8_t vs2, size_t vl) {
+ return __riscv_sf_vqmacc_4x8x4_i32m8(vd, vs1, vs2, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vqmaccsu_2x8x2.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vqmaccsu_2x8x2.c
new file mode 100644
index 000000000000000..a1f95d2c150f29d
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vqmaccsu_2x8x2.c
@@ -0,0 +1,47 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +xsfvqmaccdod \
+// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN: FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x i32> @test_sf_vqmaccsu_2x8x2_i32m1
+// CHECK-RV64-SAME: (<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.sf.vqmaccsu.2x8x2.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD]], <vscale x 8 x i8> [[VS1]], <vscale x 8 x i8> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_sf_vqmaccsu_2x8x2_i32m1(vint32m1_t vd, vint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+ return __riscv_sf_vqmaccsu_2x8x2_i32m1(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x i32> @test_sf_vqmaccsu_2x8x2_i32m2
+// CHECK-RV64-SAME: (<vscale x 4 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.sf.vqmaccsu.2x8x2.nxv4i32.nxv8i8.nxv16i8.i64(<vscale x 4 x i32> [[VD]], <vscale x 8 x i8> [[VS1]], <vscale x 16 x i8> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_sf_vqmaccsu_2x8x2_i32m2(vint32m2_t vd, vint8m1_t vs1, vuint8m2_t vs2, size_t vl) {
+ return __riscv_sf_vqmaccsu_2x8x2_i32m2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x i32> @test_sf_vqmaccsu_2x8x2_i32m4
+// CHECK-RV64-SAME: (<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.sf.vqmaccsu.2x8x2.nxv8i32.nxv8i8.nxv32i8.i64(<vscale x 8 x i32> [[VD]], <vscale x 8 x i8> [[VS1]], <vscale x 32 x i8> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_sf_vqmaccsu_2x8x2_i32m4(vint32m4_t vd, vint8m1_t vs1, vuint8m4_t vs2, size_t vl) {
+ return __riscv_sf_vqmaccsu_2x8x2_i32m4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x i32> @test_sf_vqmaccsu_2x8x2_i32m8
+// CHECK-RV64-SAME: (<vscale x 16 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.sf.vqmaccsu.2x8x2.nxv16i32.nxv8i8.nxv64i8.i64(<vscale x 16 x i32> [[VD]], <vscale x 8 x i8> [[VS1]], <vscale x 64 x i8> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_sf_vqmaccsu_2x8x2_i32m8(vint32m8_t vd, vint8m1_t vs1, vuint8m8_t vs2, size_t vl) {
+ return __riscv_sf_vqmaccsu_2x8x2_i32m8(vd, vs1, vs2, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vqmaccsu_4x8x4.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vqmaccsu_4x8x4.c
new file mode 100644
index 000000000000000..f34517b24bcf205
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vqmaccsu_4x8x4.c
@@ -0,0 +1,47 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +xsfvqmaccqoq \
+// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN: FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x i32> @test_sf_vqmaccsu_4x8x4_i32m1
+// CHECK-RV64-SAME: (<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.sf.vqmaccsu.4x8x4.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD]], <vscale x 8 x i8> [[VS1]], <vscale x 8 x i8> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_sf_vqmaccsu_4x8x4_i32m1(vint32m1_t vd, vint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+ return __riscv_sf_vqmaccsu_4x8x4_i32m1(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x i32> @test_sf_vqmaccsu_4x8x4_i32m2
+// CHECK-RV64-SAME: (<vscale x 4 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.sf.vqmaccsu.4x8x4.nxv4i32.nxv8i8.nxv16i8.i64(<vscale x 4 x i32> [[VD]], <vscale x 8 x i8> [[VS1]], <vscale x 16 x i8> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_sf_vqmaccsu_4x8x4_i32m2(vint32m2_t vd, vint8m1_t vs1, vuint8m2_t vs2, size_t vl) {
+ return __riscv_sf_vqmaccsu_4x8x4_i32m2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x i32> @test_sf_vqmaccsu_4x8x4_i32m4
+// CHECK-RV64-SAME: (<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.sf.vqmaccsu.4x8x4.nxv8i32.nxv8i8.nxv32i8.i64(<vscale x 8 x i32> [[VD]], <vscale x 8 x i8> [[VS1]], <vscale x 32 x i8> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_sf_vqmaccsu_4x8x4_i32m4(vint32m4_t vd, vint8m1_t vs1, vuint8m4_t vs2, size_t vl) {
+ return __riscv_sf_vqmaccsu_4x8x4_i32m4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x i32> @test_sf_vqmaccsu_4x8x4_i32m8
+// CHECK-RV64-SAME: (<vscale x 16 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.sf.vqmaccsu.4x8x4.nxv16i32.nxv8i8.nxv64i8.i64(<vscale x 16 x i32> [[VD]], <vscale x 8 x i8> [[VS1]], <vscale x 64 x i8> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_sf_vqmaccsu_4x8x4_i32m8(vint32m8_t vd, vint8m1_t vs1, vuint8m8_t vs2, size_t vl) {
+ return __riscv_sf_vqmaccsu_4x8x4_i32m8(vd, vs1, vs2, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vqmaccu_2x8x2.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vqmaccu_2x8x2.c
new file mode 100644
index 000000000000000..9f887562e12b435
--- /dev/null
+++ b/...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
SiFive Int8 Matrix Multiplication Extensions Specification https://sifive.cdn.prismic.io/sifive/c4f0e51d-4dd3-402a-98bc-1ffad6011259_int8-matmul-spec.pdf
cfcaf76
to
a41af66
Compare
# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v,+xsfvqmaccqoq,+xsfvqmaccdod %s \ | ||
# RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN | ||
|
||
sf.vqmaccu.2x8x2 v8, v4, v20 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand something, why aren't there any with v0.t
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The spec reserves masked for these instructions.
I'm not sure why we didn't define masked version of these extensions, do you know the reason @topperc @kito-cheng ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
From intuition I would say the motivation to mask a matrix multiplication may not be strong? To split matrixes, tiling are done. So a masked version is probably not needed. (This is my own personal guess though. Not a math library expert here to back this up with experience.
|
LGTM. If no one else has any objections, I think we can merge it. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
SiFive Int8 Matrix Multiplication Extensions Specification
https://sifive.cdn.prismic.io/sifive/c4f0e51d-4dd3-402a-98bc-1ffad6011259_int8-matmul-spec.pdf