Skip to content

Commit a0a3c79

Browse files
authored
[Clang][SME] Warn when a function doesn't have ZA state (llvm#75805)
This patch adds a warning that's emitted when a builtin call uses ZA state but the calling function doesn't provide any. Patch by David Sherwood <[email protected]>.
1 parent 4d9d105 commit a0a3c79

24 files changed

+371
-297
lines changed

clang/include/clang/Basic/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,9 @@ clang_tablegen(arm_sme_sema_rangechecks.inc -gen-arm-sme-sema-rangechecks
103103
clang_tablegen(arm_sme_streaming_attrs.inc -gen-arm-sme-streaming-attrs
104104
SOURCE arm_sme.td
105105
TARGET ClangARMSmeStreamingAttrs)
106+
clang_tablegen(arm_sme_builtins_za_state.inc -gen-arm-sme-builtin-za-state
107+
SOURCE arm_sme.td
108+
TARGET ClangARMSmeBuiltinsZAState)
106109
clang_tablegen(arm_cde_builtins.inc -gen-arm-cde-builtin-def
107110
SOURCE arm_cde.td
108111
TARGET ClangARMCdeBuiltinsDef)

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3155,6 +3155,9 @@ def err_attribute_arm_feature_sve_bits_unsupported : Error<
31553155
def warn_attribute_arm_sm_incompat_builtin : Warning<
31563156
"builtin call has undefined behaviour when called from a %0 function">,
31573157
InGroup<DiagGroup<"undefined-arm-streaming">>;
3158+
def warn_attribute_arm_za_builtin_no_za_state : Warning<
3159+
"builtin call is not valid when calling from a function without active ZA state">,
3160+
InGroup<DiagGroup<"undefined-arm-za">>;
31583161
def err_sve_vector_in_non_sve_target : Error<
31593162
"SVE vector type %0 cannot be used in a target without sve">;
31603163
def err_attribute_riscv_rvv_bits_unsupported : Error<

clang/lib/Sema/SemaChecking.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3174,6 +3174,25 @@ static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
31743174
}
31753175
}
31763176

3177+
static bool hasSMEZAState(const FunctionDecl *FD) {
3178+
if (FD->hasAttr<ArmNewZAAttr>())
3179+
return true;
3180+
if (const auto *T = FD->getType()->getAs<FunctionProtoType>())
3181+
if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask)
3182+
return true;
3183+
return false;
3184+
}
3185+
3186+
static bool hasSMEZAState(unsigned BuiltinID) {
3187+
switch (BuiltinID) {
3188+
default:
3189+
return false;
3190+
#define GET_SME_BUILTIN_HAS_ZA_STATE
3191+
#include "clang/Basic/arm_sme_builtins_za_state.inc"
3192+
#undef GET_SME_BUILTIN_HAS_ZA_STATE
3193+
}
3194+
}
3195+
31773196
bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
31783197
if (const FunctionDecl *FD = getCurFunctionDecl()) {
31793198
std::optional<ArmStreamingType> BuiltinType;
@@ -3186,6 +3205,11 @@ bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
31863205

31873206
if (BuiltinType)
31883207
checkArmStreamingBuiltin(*this, TheCall, FD, *BuiltinType);
3208+
3209+
if (hasSMEZAState(BuiltinID) && !hasSMEZAState(FD))
3210+
Diag(TheCall->getBeginLoc(),
3211+
diag::warn_attribute_arm_za_builtin_no_za_state)
3212+
<< TheCall->getSourceRange();
31893213
}
31903214

31913215
// Range check SME intrinsics that take immediate values.

clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
3131
// CHECK-CXX-NEXT: ret void
3232
//
33-
void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
33+
void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za {
3434
SME_ACLE_FUNC(svaddha_za32, _u32, _m)(0, pn, pm, zn);
3535
}
3636

@@ -50,7 +50,7 @@ void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stream
5050
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
5151
// CHECK-CXX-NEXT: ret void
5252
//
53-
void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
53+
void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za {
5454
SME_ACLE_FUNC(svaddha_za32, _u32, _m)(3, pn, pm, zn);
5555
}
5656

@@ -70,7 +70,7 @@ void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stre
7070
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
7171
// CHECK-CXX-NEXT: ret void
7272
//
73-
void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
73+
void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za {
7474
SME_ACLE_FUNC(svaddha_za32, _s32, _m)(0, pn, pm, zn);
7575
}
7676

@@ -90,7 +90,7 @@ void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streami
9090
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
9191
// CHECK-CXX-NEXT: ret void
9292
//
93-
void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
93+
void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za {
9494
SME_ACLE_FUNC(svaddha_za32, _s32, _m)(3, pn, pm, zn);
9595
}
9696

@@ -110,7 +110,7 @@ void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_strea
110110
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
111111
// CHECK-CXX-NEXT: ret void
112112
//
113-
void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
113+
void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za {
114114
SME_ACLE_FUNC(svaddva_za32, _u32, _m)(0, pn, pm, zn);
115115
}
116116

@@ -130,7 +130,7 @@ void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stream
130130
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
131131
// CHECK-CXX-NEXT: ret void
132132
//
133-
void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
133+
void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za {
134134
SME_ACLE_FUNC(svaddva_za32, _u32, _m)(3, pn, pm, zn);
135135
}
136136

@@ -150,7 +150,7 @@ void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stre
150150
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
151151
// CHECK-CXX-NEXT: ret void
152152
//
153-
void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
153+
void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za {
154154
SME_ACLE_FUNC(svaddva_za32, _s32, _m)(0, pn, pm, zn);
155155
}
156156

@@ -170,7 +170,7 @@ void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streami
170170
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
171171
// CHECK-CXX-NEXT: ret void
172172
//
173-
void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
173+
void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za {
174174
SME_ACLE_FUNC(svaddva_za32, _s32, _m)(3, pn, pm, zn);
175175
}
176176
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:

clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
3131
// CHECK-CXX-NEXT: ret void
3232
//
33-
void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
33+
void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za {
3434
SME_ACLE_FUNC(svaddha_za64, _u64, _m)(0, pn, pm, zn);
3535
}
3636

@@ -50,7 +50,7 @@ void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stream
5050
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
5151
// CHECK-CXX-NEXT: ret void
5252
//
53-
void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
53+
void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za {
5454
SME_ACLE_FUNC(svaddha_za64, _u64, _m)(7, pn, pm, zn);
5555
}
5656

@@ -70,7 +70,7 @@ void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stre
7070
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
7171
// CHECK-CXX-NEXT: ret void
7272
//
73-
void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
73+
void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za {
7474
SME_ACLE_FUNC(svaddha_za64, _s64, _m)(0, pn, pm, zn);
7575
}
7676

@@ -90,7 +90,7 @@ void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streami
9090
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
9191
// CHECK-CXX-NEXT: ret void
9292
//
93-
void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
93+
void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za {
9494
SME_ACLE_FUNC(svaddha_za64, _s64, _m)(7, pn, pm, zn);
9595
}
9696

@@ -110,7 +110,7 @@ void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_strea
110110
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
111111
// CHECK-CXX-NEXT: ret void
112112
//
113-
void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
113+
void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za {
114114
SME_ACLE_FUNC(svaddva_za64, _u64, _m)(0, pn, pm, zn);
115115
}
116116

@@ -130,7 +130,7 @@ void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stream
130130
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
131131
// CHECK-CXX-NEXT: ret void
132132
//
133-
void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
133+
void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za {
134134
SME_ACLE_FUNC(svaddva_za64, _u64, _m)(7, pn, pm, zn);
135135
}
136136

@@ -150,7 +150,7 @@ void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stre
150150
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
151151
// CHECK-CXX-NEXT: ret void
152152
//
153-
void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
153+
void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za {
154154
SME_ACLE_FUNC(svaddva_za64, _s64, _m)(0, pn, pm, zn);
155155
}
156156

@@ -170,7 +170,7 @@ void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streami
170170
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
171171
// CHECK-CXX-NEXT: ret void
172172
//
173-
void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
173+
void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za {
174174
SME_ACLE_FUNC(svaddva_za64, _s64, _m)(7, pn, pm, zn);
175175
}
176176
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:

clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
2323
// CHECK-CXX-NEXT: ret void
2424
//
25-
void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
25+
void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
2626
svld1_hor_za8(0, slice_base, pg, ptr);
2727
svld1_hor_za8(0, slice_base + 15, pg, ptr);
2828
}
@@ -45,7 +45,7 @@ void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm
4545
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]])
4646
// CHECK-CXX-NEXT: ret void
4747
//
48-
void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
48+
void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
4949
svld1_hor_za16(0, slice_base, pg, ptr);
5050
svld1_hor_za16(1, slice_base + 7, pg, ptr);
5151
}
@@ -68,7 +68,7 @@ void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __ar
6868
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]])
6969
// CHECK-CXX-NEXT: ret void
7070
//
71-
void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
71+
void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
7272
svld1_hor_za32(0, slice_base, pg, ptr);
7373
svld1_hor_za32(3, slice_base + 3, pg, ptr);
7474
}
@@ -91,7 +91,7 @@ void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __ar
9191
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]])
9292
// CHECK-CXX-NEXT: ret void
9393
//
94-
void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
94+
void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
9595
svld1_hor_za64(0, slice_base, pg, ptr);
9696
svld1_hor_za64(7, slice_base + 1, pg, ptr);
9797
}
@@ -112,7 +112,7 @@ void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __ar
112112
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]])
113113
// CHECK-CXX-NEXT: ret void
114114
//
115-
void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
115+
void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
116116
svld1_hor_za128(0, slice_base, pg, ptr);
117117
svld1_hor_za128(15, slice_base, pg, ptr);
118118
}
@@ -133,7 +133,7 @@ void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __a
133133
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
134134
// CHECK-CXX-NEXT: ret void
135135
//
136-
void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
136+
void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
137137
svld1_ver_za8(0, slice_base, pg, ptr);
138138
svld1_ver_za8(0, slice_base + 15, pg, ptr);
139139
}
@@ -156,7 +156,7 @@ void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm
156156
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]])
157157
// CHECK-CXX-NEXT: ret void
158158
//
159-
void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
159+
void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
160160
svld1_ver_za16(0, slice_base, pg, ptr);
161161
svld1_ver_za16(1, slice_base + 7, pg, ptr);
162162
}
@@ -179,7 +179,7 @@ void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __ar
179179
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]])
180180
// CHECK-CXX-NEXT: ret void
181181
//
182-
void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
182+
void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
183183
svld1_ver_za32(0, slice_base, pg, ptr);
184184
svld1_ver_za32(3, slice_base + 3, pg, ptr);
185185
}
@@ -202,7 +202,7 @@ void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __ar
202202
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]])
203203
// CHECK-CXX-NEXT: ret void
204204
//
205-
void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
205+
void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
206206
svld1_ver_za64(0, slice_base, pg, ptr);
207207
svld1_ver_za64(7, slice_base + 1, pg, ptr);
208208
}
@@ -223,7 +223,7 @@ void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __ar
223223
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]])
224224
// CHECK-CXX-NEXT: ret void
225225
//
226-
void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
226+
void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za {
227227
svld1_ver_za128(0, slice_base, pg, ptr);
228228
svld1_ver_za128(15, slice_base, pg, ptr);
229229
}

0 commit comments

Comments
 (0)