Skip to content

[Clang][LLVM] Implement single-multi vectors MOP4{A/S} #128854

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions clang/include/clang/Basic/arm_sme.td
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ defm SVMOPS : ZAFPOuterProd<"mops">;

multiclass MOP4<string mode, string za, string t, string i, list<ImmCheck> checks> {
def _1x1 : Inst<"svmop4" # mode # "[_1x1]" # za # "[_{d}_{d}]", "vidd", t, MergeNone, i # "_1x1", [IsInOutZA, IsStreaming], checks>;
def _1x2 : Inst<"svmop4" # mode # "[_1x2]" # za # "[_{d}_{d}]", "vid2", t, MergeNone, i # "_1x2", [IsInOutZA, IsStreaming], checks>;
}

let SMETargetGuard = "sme2,sme-mop4" in {
Expand Down Expand Up @@ -345,13 +346,21 @@ multiclass SUMOP4<string mode, string za, string t, string i, list<ImmCheck> che
"vidu", t, MergeNone, "aarch64_sme_sumop4" # mode # i # "_wide_1x1",
[IsStreaming, IsInOutZA],
checks>;
def _1x2 : SInst<"svmop4" # mode # "[_1x2]" # za # "[_{d}_{3}]",
"vid2.u", t, MergeNone, "aarch64_sme_sumop4" # mode # i # "_wide_1x2",
[IsStreaming, IsInOutZA],
checks>;
}

multiclass USMOP4<string mode, string za, string t, string i, list<ImmCheck> checks> {
def _1x1 : SInst<"svmop4" # mode # "[_1x1]" # za # "[_{d}_{3}]",
"vidx", t, MergeNone, "aarch64_sme_usmop4" # mode # i # "_wide_1x1",
[IsStreaming, IsInOutZA],
checks>;
def _1x2 : SInst<"svmop4" # mode # "[_1x2]" # za # "[_{d}_{3}]",
"vid2.x", t, MergeNone, "aarch64_sme_usmop4" # mode # i # "_wide_1x2",
[IsStreaming, IsInOutZA],
checks>;
}

let SMETargetGuard = "sme2,sme-mop4" in {
Expand Down
4 changes: 2 additions & 2 deletions clang/include/clang/Basic/arm_sve.td
Original file line number Diff line number Diff line change
Expand Up @@ -2420,8 +2420,8 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
def SVSUNPK_X2 : SInst<"svunpk_{d}[_{1}_x2]", "2h", "sil", MergeNone, "aarch64_sve_sunpk_x2", [IsStreaming], []>;
def SVUUNPK_X2 : SInst<"svunpk_{d}[_{1}_x2]", "2h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x2", [IsStreaming], []>;
def SVSUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "sil", MergeNone, "aarch64_sve_sunpk_x4", [IsStreaming], []>;
def SVUUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x4", [IsStreaming], []>;
def SVSUNPK_X4 : SInst<"svunpk_{d}[_{1}_x4]", "42.h", "sil", MergeNone, "aarch64_sve_sunpk_x4", [IsStreaming], []>;
def SVUUNPK_X4 : SInst<"svunpk_{d}[_{1}_x4]", "42.h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x4", [IsStreaming], []>;
}

let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in {
Expand Down
466 changes: 466 additions & 0 deletions clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x2.c

Large diffs are not rendered by default.

104 changes: 94 additions & 10 deletions clang/test/Sema/aarch64-sme2p2-instrinsics/acle_sme2p2_imm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,27 @@

#include <arm_sme.h>

void tests_mop4_imm_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
void tests_mop4_imm_s8_s8_1x1(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x1_za32_s8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4s_1x1_za32_s8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
return;
}

void tests_mop4_imm_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
void tests_mop4_imm_u8_u8_1x1(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x1_za32_u8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4s_1x1_za32_u8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
return;
}

void tests_mop4_imm_s8_u8(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
void tests_mop4_imm_s8_u8_1x1(svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x1_za32_s8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4s_1x1_za32_s8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4a_1x1_za32_u8_s8(-1, zm, zn); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4s_1x1_za32_u8_s8(-1, zm, zn); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
return;
}

void tests_mop4_imm_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
void tests_mop4_imm_s16_s16_1x1(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x1_za32_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4s_1x1_za32_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}

Expand All @@ -35,7 +35,7 @@ void tests_mop4_imm_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_in
return;
}

void tests_mop4_imm_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
void tests_mop4_imm_u16_u16_1x1(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x1_za32_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4s_1x1_za32_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}

Expand All @@ -44,15 +44,15 @@ void tests_mop4_imm_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_
return;
}

void tests_mop4_imm_s16_u16(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
void tests_mop4_imm_s16_u16_1x1(svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x1_za64_s16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
svmop4s_1x1_za64_s16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
svmop4a_1x1_za64_u16_s16(-1, zm, zn); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
svmop4s_1x1_za64_u16_s16(-1, zm, zn); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
return;
}

void tests_mop4_imm_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
void tests_mop4_imm_f16_f16_1x1(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x1_za32_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4s_1x1_za32_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}

Expand All @@ -61,7 +61,7 @@ void tests_mop4_imm_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __ar
return;
}

void tests_mop4_imm_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
void tests_mop4_imm_bf16_bf16_1x1(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x1_za32_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4s_1x1_za32_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}

Expand All @@ -71,14 +71,98 @@ void tests_mop4_imm_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming

}

void tests_mop4_imm_f32_f32(svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") {
void tests_mop4_imm_f32_f32_1x1(svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x1_za32_f32_f32(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4s_1x1_za32_f32_f32(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
return;
}

void tests_mop4_imm_f64_f64(svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") {
void tests_mop4_imm_f64_f64_1x1(svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x1_za64_f64_f64(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
svmop4s_1x1_za64_f64_f64(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
return;
}

void tests_mop4_imm_s8_s8_1x2(svint8_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x2_za32_s8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4s_1x2_za32_s8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
return;
}

void tests_mop4_imm_u8_u8_1x2(svuint8_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x2_za32_u8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4s_1x2_za32_u8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
return;
}

void tests_mop4_imm_s8_u8_1x2(svint8_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x2_za32_s8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4s_1x2_za32_s8_u8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
return;
}

void tests_mop4_imm_u8_s8_1x2(svuint8_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x2_za32_u8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4s_1x2_za32_u8_s8(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
return;
}

void tests_mop4_imm_s16_s16_1x2(svint16_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x2_za32_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4s_1x2_za32_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}

svmop4a_1x2_za64_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
svmop4s_1x2_za64_s16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
return;
}

void tests_mop4_imm_u16_u16_1x2(svuint16_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x2_za32_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4s_1x2_za32_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}

svmop4a_1x2_za64_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
svmop4s_1x2_za64_u16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
return;
}

void tests_mop4_imm_s16_u16_1x2(svint16_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x2_za64_s16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
svmop4s_1x2_za64_s16_u16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
return;
}

void tests_mop4_imm_u16_s16_1x2(svuint16_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x2_za64_u16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
svmop4s_1x2_za64_u16_s16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
return;
}

void tests_mop4_imm_f16_f16_1x2(svfloat16_t zn, svfloat16x2_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x2_za32_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4s_1x2_za32_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}

svmop4a_1x2_za16_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
svmop4s_1x2_za16_f16_f16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
return;
}

void tests_mop4_imm_bf16_bf16_1x2(svbfloat16_t zn, svbfloat16x2_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x2_za32_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4s_1x2_za32_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}

svmop4a_1x2_za16_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
svmop4s_1x2_za16_bf16_bf16(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
return;
}

void tests_mop4_imm_f32_f32_1x2(svfloat32_t zn, svfloat32x2_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x2_za32_f32_f32(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svmop4s_1x2_za32_f32_f32(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
return;
}

void tests_mop4_imm_f64_f64_1x2(svfloat64_t zn, svfloat64x2_t zm) __arm_streaming __arm_inout("za") {
svmop4a_1x2_za64_f64_f64(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
svmop4s_1x2_za64_f64_f64(-1, zn, zm); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
return;
}
5 changes: 4 additions & 1 deletion clang/utils/TableGen/SveEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1043,7 +1043,10 @@ std::string Intrinsic::replaceTemplatedArgs(std::string Name, TypeSpec TS,
case '1':
case '2':
case '3':
T = SVEType(TS, Proto[C - '0']);
// Extract the modifier before passing to SVEType to handle numeric
// modifiers
auto [Mod, NumVectors] = getProtoModifier(Proto, (C - '0'));
T = SVEType(TS, Mod);
break;
}

Expand Down
11 changes: 10 additions & 1 deletion llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -3070,11 +3070,19 @@ let TargetPrefix = "aarch64" in {
llvm_anyvector_ty,
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;

class SME_OuterProduct_QuarterTile_Single_Multi
: DefaultAttrsIntrinsic<[],
[llvm_i32_ty,
llvm_anyvector_ty,
LLVMMatchType<0>,
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;

// 2-way and 4-way multi-vector signed/unsigned Quarter Tile Quarter Product A/S
foreach mode = ["s", "a"] in {
foreach za = ["", "_za64"] in {
foreach ty = ["s", "u", "su", "us"] in {
def int_aarch64_sme_ # ty # "mop4" # mode # za # "_wide_1x1" : SME_OuterProduct_QuarterTile_Single_Single;
def int_aarch64_sme_ # ty # "mop4" # mode # za # "_wide_1x2" : SME_OuterProduct_QuarterTile_Single_Multi;
}
}
}
Expand All @@ -3083,9 +3091,10 @@ let TargetPrefix = "aarch64" in {
foreach mode = ["s", "a"] in {
foreach wide = ["", "_wide"] in {
def int_aarch64_sme_mop4 # mode # wide # "_1x1" : SME_OuterProduct_QuarterTile_Single_Single;
def int_aarch64_sme_mop4 # mode # wide # "_1x2" : SME_OuterProduct_QuarterTile_Single_Multi;
}
}

class SME_AddVectorToTile_Intrinsic
: DefaultAttrsIntrinsic<[],
[llvm_i32_ty,
Expand Down
Loading
Loading