@@ -3257,14 +3257,14 @@ let TargetPrefix = "aarch64" in {
3257
3257
: DefaultAttrsIntrinsic<[],
3258
3258
[llvm_i32_ty,
3259
3259
llvm_anyvector_ty, LLVMMatchType<0>],
3260
- [IntrWriteMem, IntrInaccessibleMemOnly]>;
3260
+ [IntrInaccessibleMemOnly]>;
3261
3261
3262
3262
class SME2_ZA_Write_VG4_Intrinsic
3263
3263
: DefaultAttrsIntrinsic<[],
3264
3264
[llvm_i32_ty,
3265
3265
llvm_anyvector_ty, LLVMMatchType<0>,
3266
3266
LLVMMatchType<0>, LLVMMatchType<0>],
3267
- [IntrWriteMem, IntrInaccessibleMemOnly]>;
3267
+ [IntrInaccessibleMemOnly]>;
3268
3268
3269
3269
class SVE2_VG2_Multi_Single_Intrinsic
3270
3270
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
@@ -3677,18 +3677,48 @@ let TargetPrefix = "aarch64" in {
3677
3677
//
3678
3678
// Multi-Single add/sub
3679
3679
//
3680
- def int_aarch64_sme_add_write_single_za_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
3681
- def int_aarch64_sme_sub_write_single_za_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
3682
- def int_aarch64_sme_add_write_single_za_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
3683
- def int_aarch64_sme_sub_write_single_za_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
3680
+
3681
+ class SME2_Add_Sub_Write_VG2_Multi_Single_Intrinsic
3682
+ : DefaultAttrsIntrinsic<[],
3683
+ [llvm_i32_ty,
3684
+ llvm_anyvector_ty, LLVMMatchType<0>,
3685
+ LLVMMatchType<0>],
3686
+ [IntrInaccessibleMemOnly, IntrWriteMem]>;
3687
+
3688
+ class SME2_Add_Sub_Write_VG4_Multi_Single_Intrinsic
3689
+ : DefaultAttrsIntrinsic<[],
3690
+ [llvm_i32_ty,
3691
+ llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
3692
+ LLVMMatchType<0>],
3693
+ [IntrInaccessibleMemOnly, IntrWriteMem]>;
3694
+
3695
+ def int_aarch64_sme_add_write_single_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Single_Intrinsic;
3696
+ def int_aarch64_sme_sub_write_single_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Single_Intrinsic;
3697
+ def int_aarch64_sme_add_write_single_za_vg1x4 : SME2_Add_Sub_Write_VG4_Multi_Single_Intrinsic;
3698
+ def int_aarch64_sme_sub_write_single_za_vg1x4 : SME2_Add_Sub_Write_VG4_Multi_Single_Intrinsic;
3684
3699
3685
3700
//
3686
3701
// Multi-Multi add/sub
3687
3702
//
3688
- def int_aarch64_sme_add_write_za_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
3689
- def int_aarch64_sme_sub_write_za_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
3690
- def int_aarch64_sme_add_write_za_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
3691
- def int_aarch64_sme_sub_write_za_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
3703
+ class SME2_Add_Sub_Write_VG2_Multi_Multi_Intrinsic
3704
+ : DefaultAttrsIntrinsic<[],
3705
+ [llvm_i32_ty,
3706
+ llvm_anyvector_ty, LLVMMatchType<0>,
3707
+ LLVMMatchType<0>, LLVMMatchType<0>],
3708
+ [IntrInaccessibleMemOnly, IntrWriteMem]>;
3709
+
3710
+ class SME2_Add_Sub_Write_VG4_Multi_Multi_Intrinsic
3711
+ : DefaultAttrsIntrinsic<[],
3712
+ [llvm_i32_ty,
3713
+ llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,
3714
+ LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
3715
+ LLVMMatchType<0>, LLVMMatchType<0>],
3716
+ [IntrInaccessibleMemOnly, IntrWriteMem]>;
3717
+
3718
+ def int_aarch64_sme_add_write_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Multi_Intrinsic;
3719
+ def int_aarch64_sme_sub_write_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Multi_Intrinsic;
3720
+ def int_aarch64_sme_add_write_za_vg1x4 : SME2_Add_Sub_Write_VG4_Multi_Multi_Intrinsic;
3721
+ def int_aarch64_sme_sub_write_za_vg1x4 : SME2_Add_Sub_Write_VG4_Multi_Multi_Intrinsic;
3692
3722
3693
3723
// Multi-vector clamps
3694
3724
def int_aarch64_sve_sclamp_single_x2 : SVE2_VG2_Multi_Single_Single_Intrinsic;
@@ -3984,12 +4014,12 @@ let TargetPrefix = "aarch64" in {
3984
4014
def int_aarch64_sve_fp8_fmlalltt : SVE2_FP8_FMLA_FDOT;
3985
4015
def int_aarch64_sve_fp8_fmlalltt_lane : SVE2_FP8_FMLA_FDOT_Lane;
3986
4016
3987
- class SME2_FP8_CVT_X2_Single_Intrinsic
4017
+ class SVE2_FP8_CVT_X2_Single_Intrinsic
3988
4018
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
3989
4019
[llvm_nxv16i8_ty],
3990
4020
[IntrReadMem, IntrInaccessibleMemOnly]>;
3991
4021
3992
- class SME2_FP8_CVT_Single_X4_Intrinsic
4022
+ class SVE2_FP8_CVT_Single_X4_Intrinsic
3993
4023
: DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
3994
4024
[llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty],
3995
4025
[IntrReadMem, IntrInaccessibleMemOnly]>;
@@ -4053,14 +4083,14 @@ let TargetPrefix = "aarch64" in {
4053
4083
//
4054
4084
// CVT from FP8 to half-precision/BFloat16 multi-vector
4055
4085
//
4056
- def int_aarch64_sve_fp8_cvt1_x2 : SME2_FP8_CVT_X2_Single_Intrinsic ;
4057
- def int_aarch64_sve_fp8_cvt2_x2 : SME2_FP8_CVT_X2_Single_Intrinsic ;
4086
+ def int_aarch64_sve_fp8_cvt1_x2 : SVE2_FP8_CVT_X2_Single_Intrinsic ;
4087
+ def int_aarch64_sve_fp8_cvt2_x2 : SVE2_FP8_CVT_X2_Single_Intrinsic ;
4058
4088
4059
4089
//
4060
4090
// CVT from FP8 to deinterleaved half-precision/BFloat16 multi-vector
4061
4091
//
4062
- def int_aarch64_sve_fp8_cvtl1_x2 : SME2_FP8_CVT_X2_Single_Intrinsic ;
4063
- def int_aarch64_sve_fp8_cvtl2_x2 : SME2_FP8_CVT_X2_Single_Intrinsic ;
4092
+ def int_aarch64_sve_fp8_cvtl1_x2 : SVE2_FP8_CVT_X2_Single_Intrinsic ;
4093
+ def int_aarch64_sve_fp8_cvtl2_x2 : SVE2_FP8_CVT_X2_Single_Intrinsic ;
4064
4094
4065
4095
//
4066
4096
// CVT to FP8 from half-precision/BFloat16/single-precision multi-vector
@@ -4070,8 +4100,8 @@ let TargetPrefix = "aarch64" in {
4070
4100
[llvm_anyvector_ty, LLVMMatchType<0>],
4071
4101
[IntrReadMem, IntrInaccessibleMemOnly]>;
4072
4102
4073
- def int_aarch64_sve_fp8_cvt_x4 : SME2_FP8_CVT_Single_X4_Intrinsic ;
4074
- def int_aarch64_sve_fp8_cvtn_x4 : SME2_FP8_CVT_Single_X4_Intrinsic ;
4103
+ def int_aarch64_sve_fp8_cvt_x4 : SVE2_FP8_CVT_Single_X4_Intrinsic ;
4104
+ def int_aarch64_sve_fp8_cvtn_x4 : SVE2_FP8_CVT_Single_X4_Intrinsic ;
4075
4105
4076
4106
// FP8 outer product
4077
4107
def int_aarch64_sme_fp8_fmopa_za16 : SME_FP8_OuterProduct_Intrinsic;
0 commit comments