Skip to content

Commit b593bfd

Browse files
author
Francesco Petrogalli
committed
[clang][SveEmitter] SVE builtins for svusdot and svsudot ACLE.
Summary: Intrinsics, guarded by `__ARM_FEATURE_SVE_MATMUL_INT8`: * svusdot[_s32] * svusdot[_n_s32] * svusdot_lane[_s32] * svsudot[_s32] * svsudot[_n_s32] * svsudot_lane[_s32] Reviewers: sdesmalen, efriedma, david-arm, rengolin Subscribers: tschuett, kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D79877
1 parent fff3a84 commit b593bfd

File tree

6 files changed

+130
-2
lines changed

6 files changed

+130
-2
lines changed

clang/include/clang/Basic/TargetBuiltins.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ namespace clang {
244244
bool isAppendSVALL() const { return Flags & IsAppendSVALL; }
245245
bool isInsertOp1SVALL() const { return Flags & IsInsertOp1SVALL; }
246246
bool isGatherPrefetch() const { return Flags & IsGatherPrefetch; }
247+
bool isReverseUSDOT() const { return Flags & ReverseUSDOT; }
247248

248249
uint64_t getBits() const { return Flags; }
249250
bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }

clang/include/clang/Basic/arm_sve.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
// a: scalar of element type (splat to vector type)
6969
// R: scalar of 1/2 width element type (splat to vector type)
7070
// r: scalar of 1/4 width element type (splat to vector type)
71+
// @: unsigned scalar of 1/4 width element type (splat to vector type)
7172
// e: 1/2 width unsigned elements, 2x element count
7273
// b: 1/4 width unsigned elements, 4x element count
7374
// h: 1/2 width elements, 2x element count
@@ -196,6 +197,7 @@ def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the s
196197
def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches.
197198
def IsGatherPrefetch : FlagType<0x10000000>;
198199
def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped.
200+
def ReverseUSDOT : FlagType<0x40000000>; // Unsigned/signed operands must be swapped.
199201

200202
// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
201203
class ImmCheckType<int val> {
@@ -1240,6 +1242,14 @@ let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_INT8)" in {
12401242
def SVMLLA_S32 : SInst<"svmmla[_s32]", "ddqq","i", MergeNone, "aarch64_sve_smmla">;
12411243
def SVMLLA_U32 : SInst<"svmmla[_u32]", "ddqq","Ui", MergeNone, "aarch64_sve_ummla">;
12421244
def SVUSMLLA_S32 : SInst<"svusmmla[_s32]", "ddbq","i", MergeNone, "aarch64_sve_usmmla">;
1245+
1246+
def SVUSDOT_S : SInst<"svusdot[_s32]", "ddbq", "i", MergeNone, "aarch64_sve_usdot">;
1247+
def SVUSDOT_N_S : SInst<"svusdot[_n_s32]", "ddbr", "i", MergeNone, "aarch64_sve_usdot">;
1248+
def SVSUDOT_S : SInst<"svsudot[_s32]", "ddqb", "i", MergeNone, "aarch64_sve_usdot", [ReverseUSDOT]>;
1249+
def SVSUDOT_N_S : SInst<"svsudot[_n_s32]", "ddq@", "i", MergeNone, "aarch64_sve_usdot", [ReverseUSDOT]>;
1250+
1251+
def SVUSDOT_LANE_S : SInst<"svusdot_lane[_s32]", "ddbqi", "i", MergeNone, "aarch64_sve_usdot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>;
1252+
def SVSUDOT_LANE_S : SInst<"svsudot_lane[_s32]", "ddqbi", "i", MergeNone, "aarch64_sve_sudot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>;
12431253
}
12441254

12451255
let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_FP32)" in {

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8040,6 +8040,9 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
80408040
if (TypeFlags.isReverseCompare())
80418041
std::swap(Ops[1], Ops[2]);
80428042

8043+
if (TypeFlags.isReverseUSDOT())
8044+
std::swap(Ops[1], Ops[2]);
8045+
80438046
// Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
80448047
if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
80458048
llvm::Type *OpndTy = Ops[1]->getType();
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_MATMUL_INT8 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
2+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_MATMUL_INT8 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
3+
4+
#include <arm_sve.h>
5+
6+
#ifdef SVE_OVERLOADED_FORMS
7+
// A simple used,unused... macro, long enough to represent any SVE builtin.
8+
#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
9+
#else
10+
#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
11+
#endif
12+
13+
svint32_t test_svsudot_s32(svint32_t x, svint8_t y, svuint8_t z) {
14+
// CHECK-LABEL: test_svsudot_s32
15+
// CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %z, <vscale x 16 x i8> %y)
16+
// CHECK: ret <vscale x 4 x i32> %[[RET]]
17+
return SVE_ACLE_FUNC(svsudot, _s32, , )(x, y, z);
18+
}
19+
20+
svint32_t test_svsudot_n_s32(svint32_t x, svint8_t y, uint8_t z) {
21+
// CHECK-LABEL: test_svsudot_n_s32
22+
// CHECK: %[[SPLAT:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %z)
23+
// CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %[[SPLAT]], <vscale x 16 x i8> %y)
24+
// CHECK: ret <vscale x 4 x i32> %[[RET]]
25+
return SVE_ACLE_FUNC(svsudot, _n_s32, , )(x, y, z);
26+
}
27+
28+
svint32_t test_svsudot_lane_s32_0(svint32_t x, svint8_t y, svuint8_t z) {
29+
// CHECK-LABEL: test_svsudot_lane_s32_0
30+
// CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %z, i32 0)
31+
// CHECK: ret <vscale x 4 x i32> %[[RET]]
32+
return SVE_ACLE_FUNC(svsudot_lane, _s32, , )(x, y, z, 0);
33+
}
34+
35+
svint32_t test_svsudot_lane_s32_1(svint32_t x, svint8_t y, svuint8_t z) {
36+
// CHECK-LABEL: test_svsudot_lane_s32_1
37+
// CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %z, i32 1)
38+
// CHECK: ret <vscale x 4 x i32> %[[RET]]
39+
return SVE_ACLE_FUNC(svsudot_lane, _s32, , )(x, y, z, 1);
40+
}
41+
42+
svint32_t test_svsudot_lane_s32_2(svint32_t x, svint8_t y, svuint8_t z) {
43+
// CHECK-LABEL: test_svsudot_lane_s32_2
44+
// CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %z, i32 2)
45+
// CHECK: ret <vscale x 4 x i32> %[[RET]]
46+
return SVE_ACLE_FUNC(svsudot_lane, _s32, , )(x, y, z, 2);
47+
}
48+
49+
svint32_t test_svsudot_lane_s32_3(svint32_t x, svint8_t y, svuint8_t z) {
50+
// CHECK-LABEL: test_svsudot_lane_s32_3
51+
// CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %z, i32 3)
52+
// CHECK: ret <vscale x 4 x i32> %[[RET]]
53+
return SVE_ACLE_FUNC(svsudot_lane, _s32, , )(x, y, z, 3);
54+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_MATMUL_INT8 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
2+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_MATMUL_INT8 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
3+
4+
#include <arm_sve.h>
5+
6+
#ifdef SVE_OVERLOADED_FORMS
7+
// A simple used,unused... macro, long enough to represent any SVE builtin.
8+
#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
9+
#else
10+
#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
11+
#endif
12+
13+
svint32_t test_svusdot_s32(svint32_t x, svuint8_t y, svint8_t z) {
14+
// CHECK-LABEL: test_svusdot_s32
15+
// CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %z)
16+
// CHECK: ret <vscale x 4 x i32> %[[RET]]
17+
return SVE_ACLE_FUNC(svusdot, _s32, , )(x, y, z);
18+
}
19+
20+
svint32_t test_svusdot_n_s32(svint32_t x, svuint8_t y, int8_t z) {
21+
// CHECK-LABEL: test_svusdot_n_s32
22+
// CHECK: %[[SPLAT:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %z)
23+
// CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %[[SPLAT]])
24+
// CHECK: ret <vscale x 4 x i32> %[[RET]]
25+
return SVE_ACLE_FUNC(svusdot, _n_s32, , )(x, y, z);
26+
}
27+
28+
svint32_t test_svusdot_lane_s32_0(svint32_t x, svuint8_t y, svint8_t z) {
29+
// CHECK-LABEL: test_svusdot_lane_s32_0
30+
// CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %z, i32 0)
31+
// CHECK: ret <vscale x 4 x i32> %[[RET]]
32+
return SVE_ACLE_FUNC(svusdot_lane, _s32, , )(x, y, z, 0);
33+
}
34+
35+
svint32_t test_svusdot_lane_s32_1(svint32_t x, svuint8_t y, svint8_t z) {
36+
// CHECK-LABEL: test_svusdot_lane_s32_1
37+
// CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %z, i32 1)
38+
// CHECK: ret <vscale x 4 x i32> %[[RET]]
39+
return SVE_ACLE_FUNC(svusdot_lane, _s32, , )(x, y, z, 1);
40+
}
41+
42+
svint32_t test_svusdot_lane_s32_2(svint32_t x, svuint8_t y, svint8_t z) {
43+
// CHECK-LABEL: test_svusdot_lane_s32_2
44+
// CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %z, i32 2)
45+
// CHECK: ret <vscale x 4 x i32> %[[RET]]
46+
return SVE_ACLE_FUNC(svusdot_lane, _s32, , )(x, y, z, 2);
47+
}
48+
49+
svint32_t test_svusdot_lane_s32_3(svint32_t x, svuint8_t y, svint8_t z) {
50+
// CHECK-LABEL: test_svusdot_lane_s32_3
51+
// CHECK: %[[RET:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %x, <vscale x 16 x i8> %y, <vscale x 16 x i8> %z, i32 3)
52+
// CHECK: ret <vscale x 4 x i32> %[[RET]]
53+
return SVE_ACLE_FUNC(svusdot_lane, _s32, , )(x, y, z, 3);
54+
}

clang/utils/TableGen/SveEmitter.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,13 +213,13 @@ class Intrinsic {
213213
/// Return true if the intrinsic takes a splat operand.
214214
bool hasSplat() const {
215215
// These prototype modifiers are described in arm_sve.td.
216-
return Proto.find_first_of("ajfrKLR") != std::string::npos;
216+
return Proto.find_first_of("ajfrKLR@") != std::string::npos;
217217
}
218218

219219
/// Return the parameter index of the splat operand.
220220
unsigned getSplatIdx() const {
221221
// These prototype modifiers are described in arm_sve.td.
222-
auto Idx = Proto.find_first_of("ajfrKLR");
222+
auto Idx = Proto.find_first_of("ajfrKLR@");
223223
assert(Idx != std::string::npos && Idx > 0 &&
224224
"Prototype has no splat operand");
225225
return Idx - 1;
@@ -541,6 +541,12 @@ void SVEType::applyModifier(char Mod) {
541541
ElementBitwidth /= 4;
542542
NumVectors = 0;
543543
break;
544+
case '@':
545+
Signed = false;
546+
Float = false;
547+
ElementBitwidth /= 4;
548+
NumVectors = 0;
549+
break;
544550
case 'K':
545551
Signed = true;
546552
Float = false;

0 commit comments

Comments
 (0)