Skip to content

Commit f79676a

Browse files
[SVE2.1][Clang][LLVM]Add BFloat16 builtin in Clang and LLVM intrinisc (#70362)
This patch implements the builtins in Clang and the LLVM-IR intrinsic for the following: For BFADD , BFSUB, BFMAX, BFMIN, BFMAXNM and BFMINNM, for instance: svbfloat16_t svadd[_bf16]_m (svbool_t pg, svbfloat16_t zdn, svbfloat16_t zm); svbfloat16_t svadd[_bf16]_x (svbool_t pg, svbfloat16_t zdn, svbfloat16_t zm); svbfloat16_t svadd[_bf16]_z (svbool_t pg, svbfloat16_t zdn, svbfloat16_t zm); svbfloat16_t svadd[_n_bf16]_m (svbool_t pg, svbfloat16_t zdn, bfloat16_t zm); svbfloat16_t svadd[_n_bf16]_x (svbool_t pg, svbfloat16_t zdn, bfloat16_t zm); svbfloat16_t svadd[_n_bf16]_z (svbool_t pg, svbfloat16_t zdn, bfloat16_t zm); the add, could be replaced by sub, max, min, maxnm and minnm. For BFMUL: svbfloat16_t svmul[_bf16]_m(svbool_t pg, svbfloat16_t zdn, svbfloat16_t zm); svbfloat16_t svmul[_bf16]_x(svbool_t pg, svbfloat16_t zdn, svbfloat16_t zm); svbfloat16_t svmul[_bf16]_z(svbool_t pg, svbfloat16_t zdn, svbfloat16_t zm); svbfloat16_t svmul[_n_bf16]_m(svbool_t pg, svbfloat16_t zdn, bfloat16_t zm); svbfloat16_t svmul[_n_bf16]_x(svbool_t pg, svbfloat16_t zdn, bfloat16_t zm); svbfloat16_t svmul[_n_bf16]_z(svbool_t pg, svbfloat16_t zdn, bfloat16_t zm); svbfloat16_t svmul_lane[_bf16](svbfloat16_t zn, svbfloat16_t zm, uint64_t imm_idx); For BFCLAMP: svbfloat16_t svclamp[_bf16](svbfloat16_t op, svbfloat16_t min, svbfloat16_t max); For BFMLA and BFMLS svbfloat16_t svmla[_bf16]_m(svbool_t pg, svbfloat16_t zda, svbfloat16_t zn, svbfloat16_t zm); svbfloat16_t svmla[_bf16]_z(svbool_t pg, svbfloat16_t zda, svbfloat16_t zn, svbfloat16_t zm); svbfloat16_t svmla[_bf16]_x(svbool_t pg, svbfloat16_t zda, svbfloat16_t zn, svbfloat16_t zm); svbfloat16_t svmla[_n_bf16]_m(svbool_t pg, svbfloat16_t zda, svbfloat16_t zn, bfloat16_t zm); svbfloat16_t svmla[_n_bf16]_z(svbool_t pg, svbfloat16_t zda, svbfloat16_t zn, bfloat16_t zm); svbfloat16_t svmla[_n_bf16]_x(svbool_t pg, svbfloat16_t zda, svbfloat16_t zn, bfloat16_t zm); svbfloat16_t svmla_lane[_bf16](svbfloat16_t zda, svbfloat16_t zn, svbfloat16_t zm, uint64_t According to the PR#257[1] [1]ARM-software/acle#257 Co-authored-by: Matthew Devereau <[email protected]>
1 parent ad86d3e commit f79676a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+2332
-137
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1981,7 +1981,23 @@ def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sv
19811981
defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">;
19821982
}
19831983

1984-
////////////////////////////////////////////////////////////////////////////////
1984+
1985+
let TargetGuard = "sve2p1,b16b16" in {
1986+
defm SVMUL_BF : SInstZPZZ<"svmul", "b", "aarch64_sve_fmul", "aarch64_sve_fmul_u">;
1987+
defm SVADD_BF : SInstZPZZ<"svadd", "b", "aarch64_sve_fadd", "aarch64_sve_fadd_u">;
1988+
defm SVSUB_BF : SInstZPZZ<"svsub", "b", "aarch64_sve_fsub", "aarch64_sve_fsub_u">;
1989+
defm SVMAXNM_BF : SInstZPZZ<"svmaxnm","b", "aarch64_sve_fmaxnm", "aarch64_sve_fmaxnm_u">;
1990+
defm SVMINNM_BF : SInstZPZZ<"svminnm","b", "aarch64_sve_fminnm", "aarch64_sve_fminnm_u">;
1991+
defm SVMAX_BF : SInstZPZZ<"svmax", "b", "aarch64_sve_fmax", "aarch64_sve_fmax_u">;
1992+
defm SVMIN_BF : SInstZPZZ<"svmin", "b", "aarch64_sve_fmin", "aarch64_sve_fmin_u">;
1993+
defm SVMLA_BF : SInstZPZZZ<"svmla", "b", "aarch64_sve_fmla", "aarch64_sve_fmla_u", []>;
1994+
defm SVMLS_BF : SInstZPZZZ<"svmls", "b", "aarch64_sve_fmls", "aarch64_sve_fmls_u", []>;
1995+
def SVMLA_LANE_BF : SInst<"svmla_lane[_{d}]", "ddddi", "b", MergeNone, "aarch64_sve_fmla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
1996+
def SVMLS_LANE_BF : SInst<"svmls_lane[_{d}]", "ddddi", "b", MergeNone, "aarch64_sve_fmls_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
1997+
def SVMUL_LANE_BF : SInst<"svmul_lane[_{d}]", "dddi", "b", MergeNone, "aarch64_sve_fmul_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
1998+
def SVFCLAMP_BF : SInst<"svclamp[_{d}]", "dddd", "b", MergeNone, "aarch64_sve_fclamp", [], []>;
1999+
} //sve2p1,b16b16
2000+
19852001
// SME2
19862002

19872003
// SME intrinsics which operate only on vectors and do not require ZA should be added here,
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// REQUIRES: aarch64-registered-target
3+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
4+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
5+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
6+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
7+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
8+
#include <arm_sve.h>
9+
10+
#ifdef SVE_OVERLOADED_FORMS
11+
// A simple used,unused... macro, long enough to represent any SVE builtin.
12+
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
13+
#else
14+
#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
15+
#endif
16+
17+
// CHECK-LABEL: @test_svadd_bf16_m(
18+
// CHECK-NEXT: entry:
19+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
20+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
21+
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
22+
//
23+
// CPP-CHECK-LABEL: @_Z17test_svadd_bf16_mu10__SVBool_tu14__SVBfloat16_tS0_(
24+
// CPP-CHECK-NEXT: entry:
25+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
26+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
27+
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
28+
//
29+
svbfloat16_t test_svadd_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
30+
{
31+
return SVE_ACLE_FUNC(svadd, _bf16, _m)(pg, op1, op2);
32+
}
33+
34+
// CHECK-LABEL: @test_svadd_bf16_z(
35+
// CHECK-NEXT: entry:
36+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
37+
// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> zeroinitializer
38+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]])
39+
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]]
40+
//
41+
// CPP-CHECK-LABEL: @_Z17test_svadd_bf16_zu10__SVBool_tu14__SVBfloat16_tS0_(
42+
// CPP-CHECK-NEXT: entry:
43+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
44+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> zeroinitializer
45+
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]])
46+
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]]
47+
//
48+
svbfloat16_t test_svadd_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
49+
{
50+
return SVE_ACLE_FUNC(svadd, _bf16, _z)(pg, op1, op2);
51+
}
52+
53+
// CHECK-LABEL: @test_svadd_bf16_x(
54+
// CHECK-NEXT: entry:
55+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
56+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
57+
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
58+
//
59+
// CPP-CHECK-LABEL: @_Z17test_svadd_bf16_xu10__SVBool_tu14__SVBfloat16_tS0_(
60+
// CPP-CHECK-NEXT: entry:
61+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
62+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
63+
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
64+
//
65+
svbfloat16_t test_svadd_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
66+
{
67+
return SVE_ACLE_FUNC(svadd, _bf16, _x)(pg, op1, op2);
68+
}
69+
70+
// CHECK-LABEL: @test_svadd_bf16_n_m(
71+
// CHECK-NEXT: entry:
72+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
73+
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x bfloat> poison, bfloat [[OP2:%.*]], i64 0
74+
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x bfloat> [[DOTSPLATINSERT]], <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
75+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
76+
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
77+
//
78+
// CPP-CHECK-LABEL: @_Z19test_svadd_bf16_n_mu10__SVBool_tu14__SVBfloat16_tu6__bf16(
79+
// CPP-CHECK-NEXT: entry:
80+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
81+
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x bfloat> poison, bfloat [[OP2:%.*]], i64 0
82+
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x bfloat> [[DOTSPLATINSERT]], <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
83+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
84+
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
85+
//
86+
svbfloat16_t test_svadd_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
87+
{
88+
return SVE_ACLE_FUNC(svadd, _n_bf16, _m)(pg, op1, op2);
89+
}
90+
91+
// CHECK-LABEL: @test_svadd_bf16_n_z(
92+
// CHECK-NEXT: entry:
93+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
94+
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x bfloat> poison, bfloat [[OP2:%.*]], i64 0
95+
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x bfloat> [[DOTSPLATINSERT]], <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
96+
// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> zeroinitializer
97+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]])
98+
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]]
99+
//
100+
// CPP-CHECK-LABEL: @_Z19test_svadd_bf16_n_zu10__SVBool_tu14__SVBfloat16_tu6__bf16(
101+
// CPP-CHECK-NEXT: entry:
102+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
103+
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x bfloat> poison, bfloat [[OP2:%.*]], i64 0
104+
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x bfloat> [[DOTSPLATINSERT]], <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
105+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> zeroinitializer
106+
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]])
107+
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]]
108+
//
109+
svbfloat16_t test_svadd_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
110+
{
111+
return SVE_ACLE_FUNC(svadd, _n_bf16, _z)(pg, op1, op2);
112+
}
113+
114+
// CHECK-LABEL: @test_svadd_bf16_n_x(
115+
// CHECK-NEXT: entry:
116+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
117+
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x bfloat> poison, bfloat [[OP2:%.*]], i64 0
118+
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x bfloat> [[DOTSPLATINSERT]], <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
119+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
120+
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
121+
//
122+
// CPP-CHECK-LABEL: @_Z19test_svadd_bf16_n_xu10__SVBool_tu14__SVBfloat16_tu6__bf16(
123+
// CPP-CHECK-NEXT: entry:
124+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
125+
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x bfloat> poison, bfloat [[OP2:%.*]], i64 0
126+
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x bfloat> [[DOTSPLATINSERT]], <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
127+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
128+
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
129+
//
130+
svbfloat16_t test_svadd_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
131+
{
132+
return SVE_ACLE_FUNC(svadd, _n_bf16, _x)(pg, op1, op2);
133+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// REQUIRES: aarch64-registered-target
3+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
4+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
5+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
6+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
7+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -disable-O0-optnone -Werror -Wall -o /dev/null %s
8+
#include <arm_sve.h>
9+
10+
#ifdef SVE_OVERLOADED_FORMS
11+
// A simple used,unused... macro, long enough to represent any SVE builtin.
12+
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
13+
#else
14+
#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
15+
#endif
16+
17+
// CHECK-LABEL: @test_svclamp_bf16(
18+
// CHECK-NEXT: entry:
19+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fclamp.nxv8bf16(<vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[OP3:%.*]])
20+
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
21+
//
22+
// CPP-CHECK-LABEL: @_Z17test_svclamp_bf16u14__SVBfloat16_tS_S_(
23+
// CPP-CHECK-NEXT: entry:
24+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fclamp.nxv8bf16(<vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[OP3:%.*]])
25+
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
26+
//
27+
svbfloat16_t test_svclamp_bf16(svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3)
28+
{
29+
return SVE_ACLE_FUNC(svclamp, _bf16,)(op1, op2, op3);
30+
}
31+

0 commit comments

Comments
 (0)