Skip to content

[RISCV] Implement intrinsics for XAndesVDot #141441

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions clang/include/clang/Basic/riscv_andes_vector.td
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,24 @@ let ManualCodegen = [{
defm nds_vfpmadb : RVVFPMAD;
}
}

// Andes Vector Dot Product Extension (XAndesVDot)

multiclass RVVD4DOT<list<list<string>> i_suffixes_prototypes,
list<list<string>> l_suffixes_prototypes> {
let RequiredFeatures = ["Xandesvdot"],
UnMaskedPolicyScheme = HasPolicyOperand,
HasMaskedOffOperand = false,
Log2LMUL = [-1, 0, 1, 2, 3],
OverloadedName = NAME in {
defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "i", i_suffixes_prototypes>;
defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "l", l_suffixes_prototypes>;
}
}

defm nds_vd4dots : RVVD4DOT<[["vv", "v", "vv(FixedSEW:8)v(FixedSEW:8)v"]],
[["vv", "v", "vv(FixedSEW:16)v(FixedSEW:16)v"]]>;
defm nds_vd4dotu : RVVD4DOT<[["vv", "Uv", "UvUv(FixedSEW:8)Uv(FixedSEW:8)Uv"]],
[["vv", "Uv", "UvUv(FixedSEW:16)Uv(FixedSEW:16)Uv"]]>;
defm nds_vd4dotsu : RVVD4DOT<[["vv", "v", "vv(FixedSEW:8)v(FixedSEW:8)Uv"]],
[["vv", "v", "vv(FixedSEW:16)v(FixedSEW:16)Uv"]]>;
1 change: 1 addition & 0 deletions clang/include/clang/Support/RISCVVIntrinsicUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,7 @@ class RVVIntrinsic {
enum RVVRequire {
RVV_REQ_RV64,
RVV_REQ_Zvfhmin,
RVV_REQ_Xandesvdot,
RVV_REQ_Xandesvpackfph,
RVV_REQ_Xsfvcp,
RVV_REQ_Xsfvfnrclipxfqf,
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Sema/SemaRISCV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ void RISCVIntrinsicManagerImpl::ConstructRVVIntrinsics(
const TargetInfo &TI = Context.getTargetInfo();
static const std::pair<const char *, unsigned> FeatureCheckList[] = {
{"64bit", RVV_REQ_RV64},
{"xandesvdot", RVV_REQ_Xandesvdot},
{"xandesvpackfph", RVV_REQ_Xandesvpackfph},
{"xsfvcp", RVV_REQ_Xsfvcp},
{"xsfvfnrclipxfqf", RVV_REQ_Xsfvfnrclipxfqf},
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Support/RISCVVIntrinsicUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1214,6 +1214,7 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, enum RVVRequire Require) {
switch (Require) {
STRINGIFY(RVV_REQ_RV64)
STRINGIFY(RVV_REQ_Zvfhmin)
STRINGIFY(RVV_REQ_Xandesvdot)
STRINGIFY(RVV_REQ_Xandesvpackfph)
STRINGIFY(RVV_REQ_Xsfvcp)
STRINGIFY(RVV_REQ_Xsfvfnrclipxfqf)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: riscv-registered-target
// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
// RUN: -target-feature +xandesvdot -disable-O0-optnone \
// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \
// RUN: FileCheck --check-prefix=CHECK-RV64 %s

#include <andes_vector.h>

// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32mf2(
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-RV64-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
vint32mf2_t test_nds_vd4dots_vv_i32mf2(vint32mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
return __riscv_nds_vd4dots_vv_i32mf2(vd, vs1, vs2, vl);
}

// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m1(
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-RV64-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
vint32m1_t test_nds_vd4dots_vv_i32m1(vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
return __riscv_nds_vd4dots_vv_i32m1(vd, vs1, vs2, vl);
}

// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m2(
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-RV64-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
vint32m2_t test_nds_vd4dots_vv_i32m2(vint32m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
return __riscv_nds_vd4dots_vv_i32m2(vd, vs1, vs2, vl);
}

// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m4(
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-RV64-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
vint32m4_t test_nds_vd4dots_vv_i32m4(vint32m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
return __riscv_nds_vd4dots_vv_i32m4(vd, vs1, vs2, vl);
}

// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m8(
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-RV64-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
vint32m8_t test_nds_vd4dots_vv_i32m8(vint32m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
return __riscv_nds_vd4dots_vv_i32m8(vd, vs1, vs2, vl);
}

// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m1(
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-RV64-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
vint64m1_t test_nds_vd4dots_vv_i64m1(vint64m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
return __riscv_nds_vd4dots_vv_i64m1(vd, vs1, vs2, vl);
}

// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m2(
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-RV64-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
vint64m2_t test_nds_vd4dots_vv_i64m2(vint64m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
return __riscv_nds_vd4dots_vv_i64m2(vd, vs1, vs2, vl);
}

// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m4(
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-RV64-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
vint64m4_t test_nds_vd4dots_vv_i64m4(vint64m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
return __riscv_nds_vd4dots_vv_i64m4(vd, vs1, vs2, vl);
}

// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m8(
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-RV64-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
vint64m8_t test_nds_vd4dots_vv_i64m8(vint64m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
return __riscv_nds_vd4dots_vv_i64m8(vd, vs1, vs2, vl);
}

// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32mf2_m(
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-RV64-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
vint32mf2_t test_nds_vd4dots_vv_i32mf2_m(vbool64_t mask, vint32mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
return __riscv_nds_vd4dots_vv_i32mf2_m(mask, vd, vs1, vs2, vl);
}

// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m1_m(
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-RV64-NEXT: ret <vscale x 2 x i32> [[TMP0]]
//
vint32m1_t test_nds_vd4dots_vv_i32m1_m(vbool32_t mask, vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
return __riscv_nds_vd4dots_vv_i32m1_m(mask, vd, vs1, vs2, vl);
}

// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m2_m(
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-RV64-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
vint32m2_t test_nds_vd4dots_vv_i32m2_m(vbool16_t mask, vint32m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
return __riscv_nds_vd4dots_vv_i32m2_m(mask, vd, vs1, vs2, vl);
}

// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m4_m(
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-RV64-NEXT: ret <vscale x 8 x i32> [[TMP0]]
//
vint32m4_t test_nds_vd4dots_vv_i32m4_m(vbool8_t mask, vint32m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
return __riscv_nds_vd4dots_vv_i32m4_m(mask, vd, vs1, vs2, vl);
}

// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m8_m(
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-RV64-NEXT: ret <vscale x 16 x i32> [[TMP0]]
//
vint32m8_t test_nds_vd4dots_vv_i32m8_m(vbool4_t mask, vint32m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
return __riscv_nds_vd4dots_vv_i32m8_m(mask, vd, vs1, vs2, vl);
}

// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m1_m(
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-RV64-NEXT: ret <vscale x 1 x i64> [[TMP0]]
//
vint64m1_t test_nds_vd4dots_vv_i64m1_m(vbool64_t mask, vint64m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
return __riscv_nds_vd4dots_vv_i64m1_m(mask, vd, vs1, vs2, vl);
}

// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m2_m(
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-RV64-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
vint64m2_t test_nds_vd4dots_vv_i64m2_m(vbool32_t mask, vint64m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
return __riscv_nds_vd4dots_vv_i64m2_m(mask, vd, vs1, vs2, vl);
}

// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m4_m(
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-RV64-NEXT: ret <vscale x 4 x i64> [[TMP0]]
//
vint64m4_t test_nds_vd4dots_vv_i64m4_m(vbool16_t mask, vint64m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
return __riscv_nds_vd4dots_vv_i64m4_m(mask, vd, vs1, vs2, vl);
}

// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m8_m(
// CHECK-RV64-NEXT: entry:
// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
// CHECK-RV64-NEXT: ret <vscale x 8 x i64> [[TMP0]]
//
vint64m8_t test_nds_vd4dots_vv_i64m8_m(vbool8_t mask, vint64m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
return __riscv_nds_vd4dots_vv_i64m8_m(mask, vd, vs1, vs2, vl);
}
Loading