Skip to content

Commit 8186e15

Browse files
authored
[SME2] Add LUTI2 and LUTI4 single Builtins and Intrinsics (#73304)
See ARM-software/acle#217 Patch by: Hassnaa Hamdi <[email protected]>
1 parent 1f283a6 commit 8186e15

File tree

9 files changed

+517
-16
lines changed

9 files changed

+517
-16
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,14 @@ let TargetGuard = "sme2" in {
338338
def SVLUTI4_LANE_ZT_X4 : Inst<"svluti4_lane_zt_{d}_x4", "4.di[i", "sUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x4", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_1>]>;
339339
}
340340

341+
//
342+
// lookup table expand one register
343+
//
344+
let TargetGuard = "sme2" in {
345+
def SVLUTI2_LANE_ZT : Inst<"svluti2_lane_zt_{d}", "di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
346+
def SVLUTI4_LANE_ZT : Inst<"svluti4_lane_zt_{d}", "di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
347+
}
348+
341349
//
342350
// lookup table expand two contiguous registers
343351
//
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
3+
// REQUIRES: aarch64-registered-target
4+
5+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
6+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
7+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
8+
9+
#include <arm_sme_draft_spec_subject_to_change.h>
10+
11+
12+
// CHECK-LABEL: @test_svluti2_lane_zt_u8(
13+
// CHECK-NEXT: entry:
14+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
15+
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
16+
//
17+
// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t(
18+
// CPP-CHECK-NEXT: entry:
19+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
20+
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
21+
//
22+
svuint8_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
23+
return svluti2_lane_zt_u8(0, zn, 15);
24+
}
25+
26+
27+
// CHECK-LABEL: @test_svluti2_lane_zt_s8(
28+
// CHECK-NEXT: entry:
29+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
30+
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
31+
//
32+
// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_s8u11__SVUint8_t(
33+
// CPP-CHECK-NEXT: entry:
34+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
35+
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
36+
//
37+
svint8_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
38+
return svluti2_lane_zt_s8(0, zn, 15);
39+
}
40+
41+
// CHECK-LABEL: @test_svluti2_lane_zt_u16(
42+
// CHECK-NEXT: entry:
43+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
44+
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
45+
//
46+
// CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u16u11__SVUint8_t(
47+
// CPP-CHECK-NEXT: entry:
48+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
49+
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
50+
//
51+
svuint16_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
52+
return svluti2_lane_zt_u16(0, zn, 15);
53+
}
54+
55+
56+
// CHECK-LABEL: @test_svluti2_lane_zt_s16(
57+
// CHECK-NEXT: entry:
58+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
59+
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
60+
//
61+
// CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s16u11__SVUint8_t(
62+
// CPP-CHECK-NEXT: entry:
63+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
64+
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
65+
//
66+
svint16_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
67+
return svluti2_lane_zt_s16(0, zn, 15);
68+
}
69+
70+
// CHECK-LABEL: @test_svluti2_lane_zt_f16(
71+
// CHECK-NEXT: entry:
72+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.luti2.lane.zt.nxv8f16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
73+
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
74+
//
75+
// CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f16u11__SVUint8_t(
76+
// CPP-CHECK-NEXT: entry:
77+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.luti2.lane.zt.nxv8f16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
78+
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
79+
//
80+
svfloat16_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
81+
return svluti2_lane_zt_f16(0, zn, 15);
82+
}
83+
84+
// CHECK-LABEL: @test_svluti2_lane_zt_bf16(
85+
// CHECK-NEXT: entry:
86+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.luti2.lane.zt.nxv8bf16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
87+
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
88+
//
89+
// CPP-CHECK-LABEL: @_Z25test_svluti2_lane_zt_bf16u11__SVUint8_t(
90+
// CPP-CHECK-NEXT: entry:
91+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.luti2.lane.zt.nxv8bf16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
92+
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
93+
//
94+
svbfloat16_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
95+
return svluti2_lane_zt_bf16(0, zn, 15);
96+
}
97+
98+
// CHECK-LABEL: @test_svluti2_lane_zt_u32(
99+
// CHECK-NEXT: entry:
100+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti2.lane.zt.nxv4i32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
101+
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
102+
//
103+
// CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u32u11__SVUint8_t(
104+
// CPP-CHECK-NEXT: entry:
105+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti2.lane.zt.nxv4i32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
106+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
107+
//
108+
svuint32_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
109+
return svluti2_lane_zt_u32(0, zn, 15);
110+
}
111+
112+
// CHECK-LABEL: @test_svluti2_lane_zt_s32(
113+
// CHECK-NEXT: entry:
114+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti2.lane.zt.nxv4i32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
115+
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
116+
//
117+
// CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s32u11__SVUint8_t(
118+
// CPP-CHECK-NEXT: entry:
119+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti2.lane.zt.nxv4i32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
120+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
121+
//
122+
svint32_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
123+
return svluti2_lane_zt_s32(0, zn, 15);
124+
}
125+
126+
// CHECK-LABEL: @test_svluti2_lane_zt_f32(
127+
// CHECK-NEXT: entry:
128+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.luti2.lane.zt.nxv4f32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
129+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
130+
//
131+
// CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f32u11__SVUint8_t(
132+
// CPP-CHECK-NEXT: entry:
133+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.luti2.lane.zt.nxv4f32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
134+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
135+
//
136+
svfloat32_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
137+
return svluti2_lane_zt_f32(0, zn, 15);
138+
}
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
3+
// REQUIRES: aarch64-registered-target
4+
5+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
6+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
7+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
8+
9+
#include <arm_sme_draft_spec_subject_to_change.h>
10+
11+
12+
// CHECK-LABEL: @test_svluti4_lane_zt_u8(
13+
// CHECK-NEXT: entry:
14+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
15+
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
16+
//
17+
// CPP-CHECK-LABEL: @_Z23test_svluti4_lane_zt_u8u11__SVUint8_t(
18+
// CPP-CHECK-NEXT: entry:
19+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
20+
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
21+
//
22+
svuint8_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
23+
return svluti4_lane_zt_u8(0, zn, 7);
24+
}
25+
26+
27+
// CHECK-LABEL: @test_svluti4_lane_zt_s8(
28+
// CHECK-NEXT: entry:
29+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
30+
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
31+
//
32+
// CPP-CHECK-LABEL: @_Z23test_svluti4_lane_zt_s8u11__SVUint8_t(
33+
// CPP-CHECK-NEXT: entry:
34+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
35+
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
36+
//
37+
svint8_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
38+
return svluti4_lane_zt_s8(0, zn, 7);
39+
}
40+
41+
// CHECK-LABEL: @test_svluti4_lane_zt_u16(
42+
// CHECK-NEXT: entry:
43+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
44+
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
45+
//
46+
// CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_u16u11__SVUint8_t(
47+
// CPP-CHECK-NEXT: entry:
48+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
49+
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
50+
//
51+
svuint16_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
52+
return svluti4_lane_zt_u16(0, zn, 7);
53+
}
54+
55+
// CHECK-LABEL: @test_svluti4_lane_zt_s16(
56+
// CHECK-NEXT: entry:
57+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
58+
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
59+
//
60+
// CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_s16u11__SVUint8_t(
61+
// CPP-CHECK-NEXT: entry:
62+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
63+
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
64+
//
65+
svint16_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
66+
return svluti4_lane_zt_s16(0, zn, 7);
67+
}
68+
69+
// CHECK-LABEL: @test_svluti4_lane_zt_f16(
70+
// CHECK-NEXT: entry:
71+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.luti4.lane.zt.nxv8f16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
72+
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
73+
//
74+
// CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_f16u11__SVUint8_t(
75+
// CPP-CHECK-NEXT: entry:
76+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.luti4.lane.zt.nxv8f16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
77+
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
78+
//
79+
svfloat16_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
80+
return svluti4_lane_zt_f16(0, zn, 7);
81+
}
82+
83+
// CHECK-LABEL: @test_svluti4_lane_zt_bf16(
84+
// CHECK-NEXT: entry:
85+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.luti4.lane.zt.nxv8bf16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
86+
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
87+
//
88+
// CPP-CHECK-LABEL: @_Z25test_svluti4_lane_zt_bf16u11__SVUint8_t(
89+
// CPP-CHECK-NEXT: entry:
90+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.luti4.lane.zt.nxv8bf16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
91+
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
92+
//
93+
svbfloat16_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
94+
return svluti4_lane_zt_bf16(0, zn, 7);
95+
}
96+
97+
// CHECK-LABEL: @test_svluti4_lane_zt_u32(
98+
// CHECK-NEXT: entry:
99+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
100+
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
101+
//
102+
// CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_u32u11__SVUint8_t(
103+
// CPP-CHECK-NEXT: entry:
104+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
105+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
106+
//
107+
svuint32_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
108+
return svluti4_lane_zt_u32(0, zn, 7);
109+
}
110+
111+
// CHECK-LABEL: @test_svluti4_lane_zt_s32(
112+
// CHECK-NEXT: entry:
113+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
114+
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
115+
//
116+
// CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_s32u11__SVUint8_t(
117+
// CPP-CHECK-NEXT: entry:
118+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
119+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
120+
//
121+
svint32_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
122+
return svluti4_lane_zt_s32(0, zn, 7);
123+
}
124+
125+
// CHECK-LABEL: @test_svluti4_lane_zt_f32(
126+
// CHECK-NEXT: entry:
127+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.luti4.lane.zt.nxv4f32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
128+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
129+
//
130+
// CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_f32u11__SVUint8_t(
131+
// CPP-CHECK-NEXT: entry:
132+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.luti4.lane.zt.nxv4f32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
133+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
134+
//
135+
svfloat32_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za {
136+
return svluti4_lane_zt_f32(0, zn, 7);
137+
}

clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,60 @@ void test_svluti4_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_shared_za __arm
7878
svluti4_lane_zt_f32_x4(0, zn, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
7979
}
8080

81+
void test_svluti2_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_shared_za __arm_preserves_za {
82+
// Test Reg Offset
83+
svluti2_lane_zt_u8(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
84+
// Test index value range
85+
svluti2_lane_zt_u8(0, zn_u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
86+
// Test Reg Offset
87+
svluti2_lane_zt_u16(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
88+
// Test index value range
89+
svluti2_lane_zt_u16(0, zn_u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
90+
// Test Reg Offset
91+
svluti2_lane_zt_f16(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
92+
// Test index value range
93+
svluti2_lane_zt_f16(0, zn_u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
94+
// Test Reg Offset
95+
svluti2_lane_zt_bf16(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
96+
// Test index value range
97+
svluti2_lane_zt_bf16(0, zn_u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
98+
// Test Reg Offset
99+
svluti2_lane_zt_u32(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
100+
// Test index value range
101+
svluti2_lane_zt_u32(0, zn_u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
102+
// Test Reg Offset
103+
svluti2_lane_zt_f32(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
104+
// Test index value range
105+
svluti2_lane_zt_f32(0, zn_u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
106+
}
107+
108+
void test_svluti4_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_shared_za __arm_preserves_za {
109+
// Test Reg Offset
110+
svluti4_lane_zt_u8(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
111+
// Test index value range
112+
svluti4_lane_zt_u8(0, zn_u8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
113+
// Test Reg Offset
114+
svluti4_lane_zt_u16(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
115+
// Test index value range
116+
svluti4_lane_zt_u16(0, zn_u8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
117+
// Test Reg Offset
118+
svluti4_lane_zt_f16(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
119+
// Test index value range
120+
svluti4_lane_zt_f16(0, zn_u8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
121+
// Test Reg Offset
122+
svluti4_lane_zt_bf16(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
123+
// Test index value range
124+
svluti4_lane_zt_bf16(0, zn_u8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
125+
// Test Reg Offset
126+
svluti4_lane_zt_u32(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
127+
// Test index value range
128+
svluti4_lane_zt_u32(0, zn_u8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
129+
// Test Reg Offset
130+
svluti4_lane_zt_f32(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
131+
// Test index value range
132+
svluti4_lane_zt_f32(0, zn_u8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
133+
}
134+
81135
void test_svluti2_lane_zt_x2(svuint8_t zn_u8) __arm_streaming __arm_shared_za __arm_preserves_za {
82136
// Test Reg Offset
83137
svluti2_lane_zt_u8_x2(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}

0 commit comments

Comments
 (0)