Skip to content

Commit 9917f3c

Browse files
[Clang][AArch64] Require SVE or SSVE for scalable types. (#91356)
Scalable types are only available when: * The function is compiled with +sve * The function is compiled with +sme and the function is executed in Streaming-SVE mode.
1 parent 932ca85 commit 9917f3c

19 files changed

+216
-125
lines changed

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3209,6 +3209,8 @@ def warn_attribute_arm_zt0_builtin_no_zt0_state : Warning<
32093209
InGroup<DiagGroup<"undefined-arm-zt0">>;
32103210
def err_sve_vector_in_non_sve_target : Error<
32113211
"SVE vector type %0 cannot be used in a target without sve">;
3212+
def err_sve_vector_in_non_streaming_function : Error<
3213+
"SVE vector type %0 cannot be used in a non-streaming function">;
32123214
def err_attribute_riscv_rvv_bits_unsupported : Error<
32133215
"%0 is only supported when '-mrvv-vector-bits=<bits>' is specified with a "
32143216
"value of \"zvl\" or a power 2 in the range [64,65536]">;

clang/lib/Sema/Sema.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2054,9 +2054,15 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) {
20542054
if (Ty->isSVESizelessBuiltinType() && FD && FD->hasBody()) {
20552055
llvm::StringMap<bool> CallerFeatureMap;
20562056
Context.getFunctionFeatureMap(CallerFeatureMap, FD);
2057-
if (!Builtin::evaluateRequiredTargetFeatures("sve", CallerFeatureMap) &&
2058-
!Builtin::evaluateRequiredTargetFeatures("sme", CallerFeatureMap))
2059-
Diag(D->getLocation(), diag::err_sve_vector_in_non_sve_target) << Ty;
2057+
if (!Builtin::evaluateRequiredTargetFeatures("sve", CallerFeatureMap)) {
2058+
if (!Builtin::evaluateRequiredTargetFeatures("sme", CallerFeatureMap))
2059+
Diag(D->getLocation(), diag::err_sve_vector_in_non_sve_target) << Ty;
2060+
else if (!IsArmStreamingFunction(FD,
2061+
/*IncludeLocallyStreaming=*/true)) {
2062+
Diag(D->getLocation(), diag::err_sve_vector_in_non_streaming_function)
2063+
<< Ty;
2064+
}
2065+
}
20602066
}
20612067
};
20622068

clang/lib/Sema/SemaDecl.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8905,11 +8905,20 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
89058905
const FunctionDecl *FD = cast<FunctionDecl>(CurContext);
89068906
llvm::StringMap<bool> CallerFeatureMap;
89078907
Context.getFunctionFeatureMap(CallerFeatureMap, FD);
8908-
if (!Builtin::evaluateRequiredTargetFeatures(
8909-
"sve", CallerFeatureMap)) {
8910-
Diag(NewVD->getLocation(), diag::err_sve_vector_in_non_sve_target) << T;
8911-
NewVD->setInvalidDecl();
8912-
return;
8908+
8909+
if (!Builtin::evaluateRequiredTargetFeatures("sve", CallerFeatureMap)) {
8910+
if (!Builtin::evaluateRequiredTargetFeatures("sme", CallerFeatureMap)) {
8911+
Diag(NewVD->getLocation(), diag::err_sve_vector_in_non_sve_target) << T;
8912+
NewVD->setInvalidDecl();
8913+
return;
8914+
} else if (!IsArmStreamingFunction(FD,
8915+
/*IncludeLocallyStreaming=*/true)) {
8916+
Diag(NewVD->getLocation(),
8917+
diag::err_sve_vector_in_non_streaming_function)
8918+
<< T;
8919+
NewVD->setInvalidDecl();
8920+
return;
8921+
}
89138922
}
89148923
}
89158924

clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@
99

1010
#include <arm_sme.h>
1111

12+
#if defined __ARM_FEATURE_SME
13+
#define MODE_ATTR __arm_streaming
14+
#else
15+
#define MODE_ATTR
16+
#endif
17+
1218
#ifdef SVE_OVERLOADED_FORMS
1319
// A simple used,unused... macro, long enough to represent any SVE builtin.§
1420
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +32,7 @@
2632
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[CNT:%.*]])
2733
// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
2834
//
29-
svbool_t test_svreinterpret_svbool_svcnt(svcount_t cnt) __arm_streaming_compatible
35+
svbool_t test_svreinterpret_svbool_svcnt(svcount_t cnt) MODE_ATTR
3036
{
3137
return SVE_ACLE_FUNC(svreinterpret,_b,,)(cnt);
3238
}
@@ -41,7 +47,7 @@ svbool_t test_svreinterpret_svbool_svcnt(svcount_t cnt) __arm_streaming_compatib
4147
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[PG:%.*]])
4248
// CPP-CHECK-NEXT: ret target("aarch64.svcount") [[TMP0]]
4349
//
44-
svcount_t test_svreinterpret_svcnt_svbool(svbool_t pg) __arm_streaming_compatible
50+
svcount_t test_svreinterpret_svcnt_svbool(svbool_t pg) MODE_ATTR
4551
{
4652
return SVE_ACLE_FUNC(svreinterpret,_c,,)(pg);
4753
}

clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c

Lines changed: 42 additions & 36 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@
88
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
99
#include <arm_sve.h>
1010

11+
#if defined __ARM_FEATURE_SME
12+
#define MODE_ATTR __arm_streaming
13+
#else
14+
#define MODE_ATTR
15+
#endif
16+
1117
#ifdef SVE_OVERLOADED_FORMS
1218
// A simple used,unused... macro, long enough to represent any SVE builtin.
1319
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
@@ -27,7 +33,7 @@
2733
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
2834
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
2935
//
30-
svbfloat16_t test_svadd_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
36+
svbfloat16_t test_svadd_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
3137
{
3238
return SVE_ACLE_FUNC(svadd, _bf16, _m)(pg, op1, op2);
3339
}
@@ -46,7 +52,7 @@ svbfloat16_t test_svadd_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
4652
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]])
4753
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]]
4854
//
49-
svbfloat16_t test_svadd_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
55+
svbfloat16_t test_svadd_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
5056
{
5157
return SVE_ACLE_FUNC(svadd, _bf16, _z)(pg, op1, op2);
5258
}
@@ -63,7 +69,7 @@ svbfloat16_t test_svadd_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
6369
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
6470
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
6571
//
66-
svbfloat16_t test_svadd_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
72+
svbfloat16_t test_svadd_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
6773
{
6874
return SVE_ACLE_FUNC(svadd, _bf16, _x)(pg, op1, op2);
6975
}
@@ -84,7 +90,7 @@ svbfloat16_t test_svadd_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
8490
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
8591
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
8692
//
87-
svbfloat16_t test_svadd_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
93+
svbfloat16_t test_svadd_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR
8894
{
8995
return SVE_ACLE_FUNC(svadd, _n_bf16, _m)(pg, op1, op2);
9096
}
@@ -107,7 +113,7 @@ svbfloat16_t test_svadd_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
107113
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]])
108114
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]]
109115
//
110-
svbfloat16_t test_svadd_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
116+
svbfloat16_t test_svadd_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR
111117
{
112118
return SVE_ACLE_FUNC(svadd, _n_bf16, _z)(pg, op1, op2);
113119
}
@@ -128,7 +134,7 @@ svbfloat16_t test_svadd_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
128134
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
129135
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
130136
//
131-
svbfloat16_t test_svadd_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
137+
svbfloat16_t test_svadd_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR
132138
{
133139
return SVE_ACLE_FUNC(svadd, _n_bf16, _x)(pg, op1, op2);
134140
}

clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmax.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@
88
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
99
#include <arm_sve.h>
1010

11+
#if defined __ARM_FEATURE_SME
12+
#define MODE_ATTR __arm_streaming
13+
#else
14+
#define MODE_ATTR
15+
#endif
16+
1117
#ifdef SVE_OVERLOADED_FORMS
1218
// A simple used,unused... macro, long enough to represent any SVE builtin.
1319
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
@@ -27,7 +33,7 @@
2733
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmax.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
2834
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
2935
//
30-
svbfloat16_t test_svmax_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
36+
svbfloat16_t test_svmax_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
3137
{
3238
return SVE_ACLE_FUNC(svmax, _bf16, _m)(pg, op1, op2);
3339
}
@@ -46,7 +52,7 @@ svbfloat16_t test_svmax_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
4652
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmax.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]])
4753
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]]
4854
//
49-
svbfloat16_t test_svmax_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
55+
svbfloat16_t test_svmax_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
5056
{
5157
return SVE_ACLE_FUNC(svmax, _bf16, _z)(pg, op1, op2);
5258
}
@@ -63,7 +69,7 @@ svbfloat16_t test_svmax_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
6369
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmax.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
6470
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
6571
//
66-
svbfloat16_t test_svmax_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
72+
svbfloat16_t test_svmax_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
6773
{
6874
return SVE_ACLE_FUNC(svmax, _bf16, _x)(pg, op1, op2);
6975
}
@@ -85,7 +91,7 @@ svbfloat16_t test_svmax_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
8591
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmax.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
8692
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
8793
//
88-
svbfloat16_t test_svmax_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
94+
svbfloat16_t test_svmax_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR
8995
{
9096
return SVE_ACLE_FUNC(svmax, _n_bf16, _m)(pg, op1, op2);
9197
}
@@ -108,7 +114,7 @@ svbfloat16_t test_svmax_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
108114
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmax.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]])
109115
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]]
110116
//
111-
svbfloat16_t test_svmax_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
117+
svbfloat16_t test_svmax_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR
112118
{
113119
return SVE_ACLE_FUNC(svmax, _n_bf16, _z)(pg, op1, op2);
114120
}
@@ -129,7 +135,7 @@ svbfloat16_t test_svmax_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
129135
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmax.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
130136
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
131137
//
132-
svbfloat16_t test_svmax_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
138+
svbfloat16_t test_svmax_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR
133139
{
134140
return SVE_ACLE_FUNC(svmax, _n_bf16, _x)(pg, op1, op2);
135141
}

clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmaxnm.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@
88
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
99
#include <arm_sve.h>
1010

11+
#if defined __ARM_FEATURE_SME
12+
#define MODE_ATTR __arm_streaming
13+
#else
14+
#define MODE_ATTR
15+
#endif
16+
1117
#ifdef SVE_OVERLOADED_FORMS
1218
// A simple used,unused... macro, long enough to represent any SVE builtin.
1319
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
@@ -27,7 +33,7 @@
2733
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmaxnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
2834
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
2935
//
30-
svbfloat16_t test_svmaxnm_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
36+
svbfloat16_t test_svmaxnm_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
3137
{
3238
return SVE_ACLE_FUNC(svmaxnm, _bf16, _m)(pg, op1, op2);
3339
}
@@ -46,7 +52,7 @@ svbfloat16_t test_svmaxnm_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2
4652
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmaxnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]])
4753
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]]
4854
//
49-
svbfloat16_t test_svmaxnm_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
55+
svbfloat16_t test_svmaxnm_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
5056
{
5157
return SVE_ACLE_FUNC(svmaxnm, _bf16, _z)(pg, op1, op2);
5258
}
@@ -63,7 +69,7 @@ svbfloat16_t test_svmaxnm_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2
6369
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmaxnm.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
6470
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
6571
//
66-
svbfloat16_t test_svmaxnm_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
72+
svbfloat16_t test_svmaxnm_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
6773
{
6874
return SVE_ACLE_FUNC(svmaxnm, _bf16, _x)(pg, op1, op2);
6975
}
@@ -85,7 +91,7 @@ svbfloat16_t test_svmaxnm_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2
8591
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmaxnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
8692
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
8793
//
88-
svbfloat16_t test_svmaxnm_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
94+
svbfloat16_t test_svmaxnm_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR
8995
{
9096
return SVE_ACLE_FUNC(svmaxnm, _n_bf16, _m)(pg, op1, op2);
9197
}
@@ -108,7 +114,7 @@ svbfloat16_t test_svmaxnm_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2
108114
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmaxnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]])
109115
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]]
110116
//
111-
svbfloat16_t test_svmaxnm_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
117+
svbfloat16_t test_svmaxnm_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR
112118
{
113119
return SVE_ACLE_FUNC(svmaxnm, _n_bf16, _z)(pg, op1, op2);
114120
}
@@ -129,7 +135,7 @@ svbfloat16_t test_svmaxnm_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2
129135
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmaxnm.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
130136
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
131137
//
132-
svbfloat16_t test_svmaxnm_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
138+
svbfloat16_t test_svmaxnm_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR
133139
{
134140
return SVE_ACLE_FUNC(svmaxnm, _n_bf16, _x)(pg, op1, op2);
135141
}

clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmin.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@
88
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
99
#include <arm_sve.h>
1010

11+
#if defined __ARM_FEATURE_SME
12+
#define MODE_ATTR __arm_streaming
13+
#else
14+
#define MODE_ATTR
15+
#endif
16+
1117
#ifdef SVE_OVERLOADED_FORMS
1218
// A simple used,unused... macro, long enough to represent any SVE builtin.
1319
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
@@ -27,7 +33,7 @@
2733
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmin.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
2834
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
2935
//
30-
svbfloat16_t test_svmin_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
36+
svbfloat16_t test_svmin_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
3137
{
3238
return SVE_ACLE_FUNC(svmin, _bf16, _m)(pg, op1, op2);
3339
}
@@ -46,7 +52,7 @@ svbfloat16_t test_svmin_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
4652
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmin.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]])
4753
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]]
4854
//
49-
svbfloat16_t test_svmin_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
55+
svbfloat16_t test_svmin_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
5056
{
5157
return SVE_ACLE_FUNC(svmin, _bf16, _z)(pg, op1, op2);
5258
}
@@ -63,7 +69,7 @@ svbfloat16_t test_svmin_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
6369
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmin.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
6470
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
6571
//
66-
svbfloat16_t test_svmin_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
72+
svbfloat16_t test_svmin_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
6773
{
6874
return SVE_ACLE_FUNC(svmin, _bf16, _x)(pg, op1, op2);
6975
}
@@ -85,7 +91,7 @@ svbfloat16_t test_svmin_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
8591
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmin.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
8692
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
8793
//
88-
svbfloat16_t test_svmin_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
94+
svbfloat16_t test_svmin_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR
8995
{
9096
return SVE_ACLE_FUNC(svmin, _n_bf16, _m)(pg, op1, op2);
9197
}
@@ -108,7 +114,7 @@ svbfloat16_t test_svmin_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
108114
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmin.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]])
109115
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]]
110116
//
111-
svbfloat16_t test_svmin_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
117+
svbfloat16_t test_svmin_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR
112118
{
113119
return SVE_ACLE_FUNC(svmin, _n_bf16, _z)(pg, op1, op2);
114120
}
@@ -129,7 +135,7 @@ svbfloat16_t test_svmin_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
129135
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmin.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
130136
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
131137
//
132-
svbfloat16_t test_svmin_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
138+
svbfloat16_t test_svmin_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR
133139
{
134140
return SVE_ACLE_FUNC(svmin, _n_bf16, _x)(pg, op1, op2);
135141
}

0 commit comments

Comments
 (0)