Skip to content

Commit e92ff64

Browse files
[Clang][LLVM] Implement single-multi vectors MOP4{A/S} (#128854)
Implement all single-multi {BF/F/S/U/SU/US}MOP4{A/S} instructions in clang and llvm following the acle in https://github.com/ARM-software/acle/pull/381/files. This PR depends on #127797 This patch updates the semantics of template arguments in intrinsic names for clarity and ease of use. Previously, template argument numbers indicated which character in the prototype string determined the final type suffix, which was confusing—especially for intrinsics using multiple prototype modifiers per operand (e.g., intrinsics operating on arrays of vectors). The number had to reference the correct character in the prototype (e.g., the ‘u’ in “2.u”), making the system cumbersome and error-prone. With this patch, template argument numbers now refer to the operand number that determines the final type suffix, providing a more intuitive and consistent approach.
1 parent c1efd8b commit e92ff64

File tree

8 files changed

+1099
-23
lines changed

8 files changed

+1099
-23
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ defm SVMOPS : ZAFPOuterProd<"mops">;
294294

295295
multiclass MOP4<string mode, string za, string t, string i, list<ImmCheck> checks> {
296296
def _1x1 : Inst<"svmop4" # mode # "[_1x1]" # za # "[_{d}_{d}]", "vidd", t, MergeNone, i # "_1x1", [IsInOutZA, IsStreaming], checks>;
297+
def _1x2 : Inst<"svmop4" # mode # "[_1x2]" # za # "[_{d}_{d}]", "vid2", t, MergeNone, i # "_1x2", [IsInOutZA, IsStreaming], checks>;
297298
}
298299

299300
let SMETargetGuard = "sme2,sme-mop4" in {
@@ -345,13 +346,21 @@ multiclass SUMOP4<string mode, string za, string t, string i, list<ImmCheck> che
345346
"vidu", t, MergeNone, "aarch64_sme_sumop4" # mode # i # "_wide_1x1",
346347
[IsStreaming, IsInOutZA],
347348
checks>;
349+
def _1x2 : SInst<"svmop4" # mode # "[_1x2]" # za # "[_{d}_{3}]",
350+
"vid2.u", t, MergeNone, "aarch64_sme_sumop4" # mode # i # "_wide_1x2",
351+
[IsStreaming, IsInOutZA],
352+
checks>;
348353
}
349354

350355
multiclass USMOP4<string mode, string za, string t, string i, list<ImmCheck> checks> {
351356
def _1x1 : SInst<"svmop4" # mode # "[_1x1]" # za # "[_{d}_{3}]",
352357
"vidx", t, MergeNone, "aarch64_sme_usmop4" # mode # i # "_wide_1x1",
353358
[IsStreaming, IsInOutZA],
354359
checks>;
360+
def _1x2 : SInst<"svmop4" # mode # "[_1x2]" # za # "[_{d}_{3}]",
361+
"vid2.x", t, MergeNone, "aarch64_sme_usmop4" # mode # i # "_wide_1x2",
362+
[IsStreaming, IsInOutZA],
363+
checks>;
355364
}
356365

357366
let SMETargetGuard = "sme2,sme-mop4" in {

clang/include/clang/Basic/arm_sve.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2420,8 +2420,8 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
24202420
let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
24212421
def SVSUNPK_X2 : SInst<"svunpk_{d}[_{1}_x2]", "2h", "sil", MergeNone, "aarch64_sve_sunpk_x2", [IsStreaming], []>;
24222422
def SVUUNPK_X2 : SInst<"svunpk_{d}[_{1}_x2]", "2h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x2", [IsStreaming], []>;
2423-
def SVSUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "sil", MergeNone, "aarch64_sve_sunpk_x4", [IsStreaming], []>;
2424-
def SVUUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x4", [IsStreaming], []>;
2423+
def SVSUNPK_X4 : SInst<"svunpk_{d}[_{1}_x4]", "42.h", "sil", MergeNone, "aarch64_sve_sunpk_x4", [IsStreaming], []>;
2424+
def SVUUNPK_X4 : SInst<"svunpk_{d}[_{1}_x4]", "42.h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x4", [IsStreaming], []>;
24252425
}
24262426

24272427
let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in {

clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x2.c

Lines changed: 466 additions & 0 deletions
Large diffs are not rendered by default.

clang/test/Sema/aarch64-sme2p2-instrinsics/acle_sme2p2_imm.cpp

Lines changed: 94 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,27 +6,27 @@
66

77
#include <arm_sme.h>
88

9-
void tests_mop4_imm_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
9+
void tests_mop4_imm_s8_s8_1x1(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
1010
svmop4a_1x1_za32_s8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
1111
svmop4s_1x1_za32_s8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
1212
return;
1313
}
1414

15-
void tests_mop4_imm_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
15+
void tests_mop4_imm_u8_u8_1x1(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
1616
svmop4a_1x1_za32_u8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
1717
svmop4s_1x1_za32_u8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
1818
return;
1919
}
2020

21-
void tests_mop4_imm_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
21+
void tests_mop4_imm_s8_u8_1x1(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
2222
svmop4a_1x1_za32_s8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
2323
svmop4s_1x1_za32_s8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
2424
svmop4a_1x1_za32_u8_s8(-1, zm, zn); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
2525
svmop4s_1x1_za32_u8_s8(-1, zm, zn); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
2626
return;
2727
}
2828

29-
void tests_mop4_imm_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
29+
void tests_mop4_imm_s16_s16_1x1(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
3030
svmop4a_1x1_za32_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
3131
svmop4s_1x1_za32_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
3232

@@ -35,7 +35,7 @@ void tests_mop4_imm_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_in
3535
return;
3636
}
3737

38-
void tests_mop4_imm_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
38+
void tests_mop4_imm_u16_u16_1x1(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
3939
svmop4a_1x1_za32_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
4040
svmop4s_1x1_za32_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
4141

@@ -44,15 +44,15 @@ void tests_mop4_imm_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_
4444
return;
4545
}
4646

47-
void tests_mop4_imm_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
47+
void tests_mop4_imm_s16_u16_1x1(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
4848
svmop4a_1x1_za64_s16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
4949
svmop4s_1x1_za64_s16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
5050
svmop4a_1x1_za64_u16_s16(-1, zm, zn); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
5151
svmop4s_1x1_za64_u16_s16(-1, zm, zn); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
5252
return;
5353
}
5454

55-
void tests_mop4_imm_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
55+
void tests_mop4_imm_f16_f16_1x1(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
5656
svmop4a_1x1_za32_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
5757
svmop4s_1x1_za32_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
5858

@@ -61,7 +61,7 @@ void tests_mop4_imm_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __ar
6161
return;
6262
}
6363

64-
void tests_mop4_imm_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
64+
void tests_mop4_imm_bf16_bf16_1x1(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
6565
svmop4a_1x1_za32_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
6666
svmop4s_1x1_za32_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
6767

@@ -71,14 +71,98 @@ void tests_mop4_imm_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming
7171

7272
}
7373

74-
void tests_mop4_imm_f32_f32(svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") {
74+
void tests_mop4_imm_f32_f32_1x1(svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") {
7575
svmop4a_1x1_za32_f32_f32(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
7676
svmop4s_1x1_za32_f32_f32(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
7777
return;
7878
}
7979

80-
void tests_mop4_imm_f64_f64(svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") {
80+
void tests_mop4_imm_f64_f64_1x1(svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") {
8181
svmop4a_1x1_za64_f64_f64(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
8282
svmop4s_1x1_za64_f64_f64(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
8383
return;
8484
}
85+
86+
void tests_mop4_imm_s8_s8_1x2(svint8_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") {
87+
svmop4a_1x2_za32_s8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
88+
svmop4s_1x2_za32_s8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
89+
return;
90+
}
91+
92+
void tests_mop4_imm_u8_u8_1x2(svuint8_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") {
93+
svmop4a_1x2_za32_u8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
94+
svmop4s_1x2_za32_u8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
95+
return;
96+
}
97+
98+
void tests_mop4_imm_s8_u8_1x2(svint8_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") {
99+
svmop4a_1x2_za32_s8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
100+
svmop4s_1x2_za32_s8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
101+
return;
102+
}
103+
104+
void tests_mop4_imm_u8_s8_1x2(svuint8_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") {
105+
svmop4a_1x2_za32_u8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
106+
svmop4s_1x2_za32_u8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
107+
return;
108+
}
109+
110+
void tests_mop4_imm_s16_s16_1x2(svint16_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") {
111+
svmop4a_1x2_za32_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
112+
svmop4s_1x2_za32_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
113+
114+
svmop4a_1x2_za64_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
115+
svmop4s_1x2_za64_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
116+
return;
117+
}
118+
119+
void tests_mop4_imm_u16_u16_1x2(svuint16_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") {
120+
svmop4a_1x2_za32_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
121+
svmop4s_1x2_za32_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
122+
123+
svmop4a_1x2_za64_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
124+
svmop4s_1x2_za64_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
125+
return;
126+
}
127+
128+
void tests_mop4_imm_s16_u16_1x2(svint16_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") {
129+
svmop4a_1x2_za64_s16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
130+
svmop4s_1x2_za64_s16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
131+
return;
132+
}
133+
134+
void tests_mop4_imm_u16_s16_1x2(svuint16_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") {
135+
svmop4a_1x2_za64_u16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
136+
svmop4s_1x2_za64_u16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
137+
return;
138+
}
139+
140+
void tests_mop4_imm_f16_f16_1x2(svfloat16_t zn, svfloat16x2_t zm) __arm_streaming __arm_inout("za") {
141+
svmop4a_1x2_za32_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
142+
svmop4s_1x2_za32_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
143+
144+
svmop4a_1x2_za16_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
145+
svmop4s_1x2_za16_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
146+
return;
147+
}
148+
149+
void tests_mop4_imm_bf16_bf16_1x2(svbfloat16_t zn, svbfloat16x2_t zm) __arm_streaming __arm_inout("za") {
150+
svmop4a_1x2_za32_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
151+
svmop4s_1x2_za32_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
152+
153+
svmop4a_1x2_za16_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
154+
svmop4s_1x2_za16_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
155+
return;
156+
}
157+
158+
void tests_mop4_imm_f32_f32_1x2(svfloat32_t zn, svfloat32x2_t zm) __arm_streaming __arm_inout("za") {
159+
svmop4a_1x2_za32_f32_f32(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
160+
svmop4s_1x2_za32_f32_f32(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
161+
return;
162+
}
163+
164+
void tests_mop4_imm_f64_f64_1x2(svfloat64_t zn, svfloat64x2_t zm) __arm_streaming __arm_inout("za") {
165+
svmop4a_1x2_za64_f64_f64(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
166+
svmop4s_1x2_za64_f64_f64(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
167+
return;
168+
}

clang/utils/TableGen/SveEmitter.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1043,7 +1043,10 @@ std::string Intrinsic::replaceTemplatedArgs(std::string Name, TypeSpec TS,
10431043
case '1':
10441044
case '2':
10451045
case '3':
1046-
T = SVEType(TS, Proto[C - '0']);
1046+
// Extract the modifier before passing to SVEType to handle numeric
1047+
// modifiers
1048+
auto [Mod, NumVectors] = getProtoModifier(Proto, (C - '0'));
1049+
T = SVEType(TS, Mod);
10471050
break;
10481051
}
10491052

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3070,11 +3070,19 @@ let TargetPrefix = "aarch64" in {
30703070
llvm_anyvector_ty,
30713071
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
30723072

3073+
class SME_OuterProduct_QuarterTile_Single_Multi
3074+
: DefaultAttrsIntrinsic<[],
3075+
[llvm_i32_ty,
3076+
llvm_anyvector_ty,
3077+
LLVMMatchType<0>,
3078+
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
3079+
30733080
// 2-way and 4-way multi-vector signed/unsigned Quarter Tile Quarter Product A/S
30743081
foreach mode = ["s", "a"] in {
30753082
foreach za = ["", "_za64"] in {
30763083
foreach ty = ["s", "u", "su", "us"] in {
30773084
def int_aarch64_sme_ # ty # "mop4" # mode # za # "_wide_1x1" : SME_OuterProduct_QuarterTile_Single_Single;
3085+
def int_aarch64_sme_ # ty # "mop4" # mode # za # "_wide_1x2" : SME_OuterProduct_QuarterTile_Single_Multi;
30783086
}
30793087
}
30803088
}
@@ -3083,9 +3091,10 @@ let TargetPrefix = "aarch64" in {
30833091
foreach mode = ["s", "a"] in {
30843092
foreach wide = ["", "_wide"] in {
30853093
def int_aarch64_sme_mop4 # mode # wide # "_1x1" : SME_OuterProduct_QuarterTile_Single_Single;
3094+
def int_aarch64_sme_mop4 # mode # wide # "_1x2" : SME_OuterProduct_QuarterTile_Single_Multi;
30863095
}
30873096
}
3088-
3097+
30893098
class SME_AddVectorToTile_Intrinsic
30903099
: DefaultAttrsIntrinsic<[],
30913100
[llvm_i32_ty,

0 commit comments

Comments
 (0)