@@ -3300,14 +3300,14 @@ let TargetPrefix = "aarch64" in {
3300
3300
: DefaultAttrsIntrinsic<[],
3301
3301
[llvm_i32_ty,
3302
3302
llvm_anyvector_ty, LLVMMatchType<0>],
3303
- [IntrWriteMem, IntrInaccessibleMemOnly]>;
3303
+ [IntrInaccessibleMemOnly]>;
3304
3304
3305
3305
class SME2_ZA_Write_VG4_Intrinsic
3306
3306
: DefaultAttrsIntrinsic<[],
3307
3307
[llvm_i32_ty,
3308
3308
llvm_anyvector_ty, LLVMMatchType<0>,
3309
3309
LLVMMatchType<0>, LLVMMatchType<0>],
3310
- [IntrWriteMem, IntrInaccessibleMemOnly]>;
3310
+ [IntrInaccessibleMemOnly]>;
3311
3311
3312
3312
class SVE2_VG2_Multi_Single_Intrinsic
3313
3313
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
@@ -3720,18 +3720,48 @@ let TargetPrefix = "aarch64" in {
3720
3720
//
3721
3721
// Multi-Single add/sub
3722
3722
//
3723
- def int_aarch64_sme_add_write_single_za_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
3724
- def int_aarch64_sme_sub_write_single_za_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
3725
- def int_aarch64_sme_add_write_single_za_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
3726
- def int_aarch64_sme_sub_write_single_za_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
3723
+
3724
+ class SME2_Add_Sub_Write_VG2_Multi_Single_Intrinsic
3725
+ : DefaultAttrsIntrinsic<[],
3726
+ [llvm_i32_ty,
3727
+ llvm_anyvector_ty, LLVMMatchType<0>,
3728
+ LLVMMatchType<0>],
3729
+ [IntrInaccessibleMemOnly, IntrWriteMem]>;
3730
+
3731
+ class SME2_Add_Sub_Write_VG4_Multi_Single_Intrinsic
3732
+ : DefaultAttrsIntrinsic<[],
3733
+ [llvm_i32_ty,
3734
+ llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
3735
+ LLVMMatchType<0>],
3736
+ [IntrInaccessibleMemOnly, IntrWriteMem]>;
3737
+
3738
+ def int_aarch64_sme_add_write_single_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Single_Intrinsic;
3739
+ def int_aarch64_sme_sub_write_single_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Single_Intrinsic;
3740
+ def int_aarch64_sme_add_write_single_za_vg1x4 : SME2_Add_Sub_Write_VG4_Multi_Single_Intrinsic;
3741
+ def int_aarch64_sme_sub_write_single_za_vg1x4 : SME2_Add_Sub_Write_VG4_Multi_Single_Intrinsic;
3727
3742
3728
3743
//
3729
3744
// Multi-Multi add/sub
3730
3745
//
3731
- def int_aarch64_sme_add_write_za_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
3732
- def int_aarch64_sme_sub_write_za_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
3733
- def int_aarch64_sme_add_write_za_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
3734
- def int_aarch64_sme_sub_write_za_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
3746
+ class SME2_Add_Sub_Write_VG2_Multi_Multi_Intrinsic
3747
+ : DefaultAttrsIntrinsic<[],
3748
+ [llvm_i32_ty,
3749
+ llvm_anyvector_ty, LLVMMatchType<0>,
3750
+ LLVMMatchType<0>, LLVMMatchType<0>],
3751
+ [IntrInaccessibleMemOnly, IntrWriteMem]>;
3752
+
3753
+ class SME2_Add_Sub_Write_VG4_Multi_Multi_Intrinsic
3754
+ : DefaultAttrsIntrinsic<[],
3755
+ [llvm_i32_ty,
3756
+ llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,
3757
+ LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
3758
+ LLVMMatchType<0>, LLVMMatchType<0>],
3759
+ [IntrInaccessibleMemOnly, IntrWriteMem]>;
3760
+
3761
+ def int_aarch64_sme_add_write_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Multi_Intrinsic;
3762
+ def int_aarch64_sme_sub_write_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Multi_Intrinsic;
3763
+ def int_aarch64_sme_add_write_za_vg1x4 : SME2_Add_Sub_Write_VG4_Multi_Multi_Intrinsic;
3764
+ def int_aarch64_sme_sub_write_za_vg1x4 : SME2_Add_Sub_Write_VG4_Multi_Multi_Intrinsic;
3735
3765
3736
3766
// Multi-vector clamps
3737
3767
def int_aarch64_sve_sclamp_single_x2 : SVE2_VG2_Multi_Single_Single_Intrinsic;
@@ -4027,12 +4057,12 @@ let TargetPrefix = "aarch64" in {
4027
4057
def int_aarch64_sve_fp8_fmlalltt : SVE2_FP8_FMLA_FDOT;
4028
4058
def int_aarch64_sve_fp8_fmlalltt_lane : SVE2_FP8_FMLA_FDOT_Lane;
4029
4059
4030
- class SME2_FP8_CVT_X2_Single_Intrinsic
4060
+ class SVE2_FP8_CVT_X2_Single_Intrinsic
4031
4061
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
4032
4062
[llvm_nxv16i8_ty],
4033
4063
[IntrReadMem, IntrInaccessibleMemOnly]>;
4034
4064
4035
- class SME2_FP8_CVT_Single_X4_Intrinsic
4065
+ class SVE2_FP8_CVT_Single_X4_Intrinsic
4036
4066
: DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
4037
4067
[llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty],
4038
4068
[IntrReadMem, IntrInaccessibleMemOnly]>;
@@ -4096,14 +4126,14 @@ let TargetPrefix = "aarch64" in {
4096
4126
//
4097
4127
// CVT from FP8 to half-precision/BFloat16 multi-vector
4098
4128
//
4099
- def int_aarch64_sve_fp8_cvt1_x2 : SME2_FP8_CVT_X2_Single_Intrinsic ;
4100
- def int_aarch64_sve_fp8_cvt2_x2 : SME2_FP8_CVT_X2_Single_Intrinsic ;
4129
+ def int_aarch64_sve_fp8_cvt1_x2 : SVE2_FP8_CVT_X2_Single_Intrinsic ;
4130
+ def int_aarch64_sve_fp8_cvt2_x2 : SVE2_FP8_CVT_X2_Single_Intrinsic ;
4101
4131
4102
4132
//
4103
4133
// CVT from FP8 to deinterleaved half-precision/BFloat16 multi-vector
4104
4134
//
4105
- def int_aarch64_sve_fp8_cvtl1_x2 : SME2_FP8_CVT_X2_Single_Intrinsic ;
4106
- def int_aarch64_sve_fp8_cvtl2_x2 : SME2_FP8_CVT_X2_Single_Intrinsic ;
4135
+ def int_aarch64_sve_fp8_cvtl1_x2 : SVE2_FP8_CVT_X2_Single_Intrinsic ;
4136
+ def int_aarch64_sve_fp8_cvtl2_x2 : SVE2_FP8_CVT_X2_Single_Intrinsic ;
4107
4137
4108
4138
//
4109
4139
// CVT to FP8 from half-precision/BFloat16/single-precision multi-vector
@@ -4113,8 +4143,8 @@ let TargetPrefix = "aarch64" in {
4113
4143
[llvm_anyvector_ty, LLVMMatchType<0>],
4114
4144
[IntrReadMem, IntrInaccessibleMemOnly]>;
4115
4145
4116
- def int_aarch64_sve_fp8_cvt_x4 : SME2_FP8_CVT_Single_X4_Intrinsic ;
4117
- def int_aarch64_sve_fp8_cvtn_x4 : SME2_FP8_CVT_Single_X4_Intrinsic ;
4146
+ def int_aarch64_sve_fp8_cvt_x4 : SVE2_FP8_CVT_Single_X4_Intrinsic ;
4147
+ def int_aarch64_sve_fp8_cvtn_x4 : SVE2_FP8_CVT_Single_X4_Intrinsic ;
4118
4148
4119
4149
// FP8 outer product
4120
4150
def int_aarch64_sme_fp8_fmopa_za16 : SME_FP8_OuterProduct_Intrinsic;
0 commit comments