[AArch64] Model ZA array using inaccessible memory #132058

Lukacma · 2025-03-19T16:41:49Z

This patch changes how ZA array is modelled at LLVM-IR level. Currently accesses to ZA are represented at LLVM-IR level as memory reads and writes and at instruction level as unmodeled side-effects. This patch changes that and models them as purely Inaccessible memory accesses without any unmodeled side-effects.

llvmbot · 2025-03-19T16:42:28Z

@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-backend-aarch64

Author: None (Lukacma)

Changes

This patch changes how ZA array is modelled at LLVM-IR level. Currently accesses to ZA are represented at LLVM-IR level as memory reads and writes and at instruction level as unmodeled side-effects. This patch changes that and models them as purely Inaccessible memory accesses without any unmodeled side-effects.

Patch is 87.92 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132058.diff

4 Files Affected:

(modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+40-40)
(modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+37-2)
(modified) llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll (+153-106)
(modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-extract-mova.ll (+253-109)

diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 6dfc3c8f2a393..7648fc55d54ae 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2940,7 +2940,7 @@ def int_aarch64_sve_whilewr_d : SVE2_CONFLICT_DETECT_Intrinsic;
 let TargetPrefix = "aarch64" in {
   class SME_Load_Store_Intrinsic<LLVMType pred_ty>
     : DefaultAttrsIntrinsic<[],
-        [pred_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<2>>]>;
+        [pred_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrInaccessibleMemOrArgMemOnly, ImmArg<ArgIndex<2>>]>;
 
   // Loads
   def int_aarch64_sme_ld1b_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
@@ -2968,18 +2968,18 @@ let TargetPrefix = "aarch64" in {
 
   // Spill + fill
   class SME_LDR_STR_ZA_Intrinsic
-    : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_ptr_ty, llvm_i32_ty]>;
+    : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_ptr_ty, llvm_i32_ty], [IntrInaccessibleMemOrArgMemOnly]>;
   def int_aarch64_sme_ldr : SME_LDR_STR_ZA_Intrinsic;
   def int_aarch64_sme_str : SME_LDR_STR_ZA_Intrinsic;
 
   class SME_TileToVector_Intrinsic
       : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
           [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
-           llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<2>>]>;
+           llvm_i32_ty, llvm_i32_ty], [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<2>>]>;
   class SME_VectorToTile_Intrinsic
       : DefaultAttrsIntrinsic<[],
           [llvm_i32_ty, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
-           llvm_anyvector_ty], [ImmArg<ArgIndex<0>>]>;
+           llvm_anyvector_ty], [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
 
   def int_aarch64_sme_read_horiz  : SME_TileToVector_Intrinsic;
   def int_aarch64_sme_read_vert   : SME_TileToVector_Intrinsic;
@@ -2994,13 +2994,13 @@ let TargetPrefix = "aarch64" in {
   class SME_MOVAZ_TileToVector_X2_Intrinsic
       : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
           [llvm_i32_ty, llvm_i32_ty],
-          [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
+          [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
 
   class SME_MOVAZ_TileToVector_X4_Intrinsic
       : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
            LLVMMatchType<0>,LLVMMatchType<0>],
           [llvm_i32_ty, llvm_i32_ty],
-          [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
+          [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
 
   def int_aarch64_sme_readz_horiz_x2 : SME_MOVAZ_TileToVector_X2_Intrinsic;
   def int_aarch64_sme_readz_vert_x2  : SME_MOVAZ_TileToVector_X2_Intrinsic;
@@ -3011,7 +3011,7 @@ let TargetPrefix = "aarch64" in {
   class SME_MOVAZ_TileToVector_Intrinsic
       : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
           [llvm_i32_ty, llvm_i32_ty],
-          [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
+          [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
 
   def int_aarch64_sme_readz_horiz : SME_MOVAZ_TileToVector_Intrinsic;
   def int_aarch64_sme_readz_vert  : SME_MOVAZ_TileToVector_Intrinsic;
@@ -3022,12 +3022,12 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sme_readz_x2
       : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
           [llvm_i32_ty],
-          [IntrNoMem, IntrHasSideEffects]>;
+          [IntrInaccessibleMemOnly]>;
 
   def int_aarch64_sme_readz_x4
       : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
           [llvm_i32_ty],
-          [IntrNoMem, IntrHasSideEffects]>;
+          [IntrInaccessibleMemOnly]>;
 
   def int_aarch64_sme_write_lane_zt
        :  DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, llvm_i32_ty],
@@ -3038,7 +3038,7 @@ let TargetPrefix = "aarch64" in {
             [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
 
 
-  def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
+  def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [IntrWriteMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
   def int_aarch64_sme_in_streaming_mode : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrNoMem]>, ClangBuiltin<"__builtin_arm_in_streaming_mode">;
 
   class SME_OuterProduct_Intrinsic
@@ -3047,7 +3047,7 @@ let TargetPrefix = "aarch64" in {
            LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
            LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
            LLVMMatchType<0>,
-           llvm_anyvector_ty], [ImmArg<ArgIndex<0>>]>;
+           llvm_anyvector_ty], [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
 
   def int_aarch64_sme_mopa : SME_OuterProduct_Intrinsic;
   def int_aarch64_sme_mops : SME_OuterProduct_Intrinsic;
@@ -3069,7 +3069,7 @@ let TargetPrefix = "aarch64" in {
           [llvm_i32_ty,
            LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
            LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
-           llvm_anyvector_ty], [ImmArg<ArgIndex<0>>]>;
+           llvm_anyvector_ty], [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
 
   def int_aarch64_sme_addha : SME_AddVectorToTile_Intrinsic;
   def int_aarch64_sme_addva : SME_AddVectorToTile_Intrinsic;
@@ -3189,56 +3189,56 @@ let TargetPrefix = "aarch64" in {
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>],
-                []>;
+                [IntrInaccessibleMemOnly]>;
 
   class SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>,
                  LLVMMatchType<0>],
-                []>;
+                [IntrInaccessibleMemOnly]>;
 
   class SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
                  LLVMMatchType<0>],
-                []>;
+                [IntrInaccessibleMemOnly]>;
 
   class SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>,
                  LLVMMatchType<0>, LLVMMatchType<0>],
-                []>;
+                [IntrInaccessibleMemOnly]>;
 
   class SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
                  LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
-                []>;
+                [IntrInaccessibleMemOnly]>;
 
   class SME2_Matrix_ArrayVector_Single_Index_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                 llvm_anyvector_ty,
                 LLVMMatchType<0>, llvm_i32_ty],
-                [ImmArg<ArgIndex<3>>]>;
+                [IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
 
   class SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>,
                  LLVMMatchType<0>, llvm_i32_ty],
-                [ImmArg<ArgIndex<4>>]>;
+                [IntrInaccessibleMemOnly, ImmArg<ArgIndex<4>>]>;
 
   class SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
                  LLVMMatchType<0>, llvm_i32_ty],
-                [ImmArg<ArgIndex<6>>]>;
+                [IntrInaccessibleMemOnly, ImmArg<ArgIndex<6>>]>;
 
   class SME2_VG2_Multi_Imm_Intrinsic
     : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],
@@ -3257,14 +3257,14 @@ let TargetPrefix = "aarch64" in {
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty,
                 llvm_anyvector_ty, LLVMMatchType<0>],
-               []>;
+               [IntrWriteMem, IntrInaccessibleMemOnly]>;
 
   class SME2_ZA_Write_VG4_Intrinsic
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty,
                 llvm_anyvector_ty, LLVMMatchType<0>,
                 LLVMMatchType<0>,  LLVMMatchType<0>],
-               []>;
+               [IntrWriteMem, IntrInaccessibleMemOnly]>;
 
   class SME2_VG2_Multi_Single_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
@@ -3353,50 +3353,50 @@ let TargetPrefix = "aarch64" in {
   class SME2_ZA_ArrayVector_Read_VG2_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
                 [llvm_i32_ty],
-                []>;
+                [IntrReadMem, IntrInaccessibleMemOnly]>;
 
   class SME2_ZA_ArrayVector_Read_VG4_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                              LLVMMatchType<0>,  LLVMMatchType<0>],
                 [llvm_i32_ty],
-                []>;
+                [IntrReadMem, IntrInaccessibleMemOnly]>;
 
   class SME2_Matrix_TileVector_Read_VG2_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
                 [llvm_i32_ty, llvm_i32_ty],
-                []>;
+                [IntrReadMem, IntrInaccessibleMemOnly]>;
 
   class SME2_Matrix_TileVector_Read_VG4_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                              LLVMMatchType<0>,  LLVMMatchType<0>],
                 [llvm_i32_ty, llvm_i32_ty],
-                []>;
+                [IntrReadMem, IntrInaccessibleMemOnly]>;
 
   class SME2_ZA_ArrayVector_Write_VG2_Intrinsic
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty,
                 llvm_anyvector_ty, LLVMMatchType<0>],
-               []>;
+               [IntrWriteMem, IntrInaccessibleMemOnly]>;
 
   class SME2_ZA_ArrayVector_Write_VG4_Intrinsic
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty,
                 llvm_anyvector_ty, LLVMMatchType<0>,
                 LLVMMatchType<0>,  LLVMMatchType<0>],
-               []>;
+               [IntrWriteMem, IntrInaccessibleMemOnly]>;
 
   class SME2_Matrix_TileVector_Write_VG2_Intrinsic
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty, llvm_i32_ty,
                 llvm_anyvector_ty, LLVMMatchType<0>],
-               [ImmArg<ArgIndex<0>>]>;
+               [IntrWriteMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
 
   class SME2_Matrix_TileVector_Write_VG4_Intrinsic
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty, llvm_i32_ty,
                 llvm_anyvector_ty, LLVMMatchType<0>,
                 LLVMMatchType<0>,  LLVMMatchType<0>],
-               [ImmArg<ArgIndex<0>>]>;
+               [IntrWriteMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
 
   class SME2_VG2_Multi_Single_Single_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
@@ -3562,7 +3562,7 @@ let TargetPrefix = "aarch64" in {
   // Multi-vector zeroing
 
   foreach vg = ["vg1x2", "vg1x4", "vg2x1", "vg2x2", "vg2x4", "vg4x1", "vg4x2", "vg4x4"] in {
-    def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], [llvm_i32_ty],  [IntrNoMem, IntrHasSideEffects]>;
+    def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], [llvm_i32_ty],  [IntrWriteMem, IntrInaccessibleMemOnly]>;
   }
   
   // Multi-vector signed saturating doubling multiply high
@@ -4002,57 +4002,57 @@ let TargetPrefix = "aarch64" in {
           [llvm_i32_ty,
           llvm_nxv16i1_ty, llvm_nxv16i1_ty,
           llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-          [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;
+          [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly]>;
 
   class SME_FP8_ZA_LANE_VGx1_Intrinsic
    : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                llvm_nxv16i8_ty,
                                llvm_nxv16i8_ty,
                                llvm_i32_ty],
-                          [IntrInaccessibleMemOnly, IntrHasSideEffects, ImmArg<ArgIndex<3>>]>;
+                          [IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
 
   class SME_FP8_ZA_LANE_VGx2_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty,
                                 llvm_i32_ty],
-                            [IntrInaccessibleMemOnly, IntrHasSideEffects, ImmArg<ArgIndex<4>>]>;
+                            [IntrInaccessibleMemOnly, ImmArg<ArgIndex<4>>]>;
 
   class SME_FP8_ZA_LANE_VGx4_Intrinsic
    : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty,
                                 llvm_i32_ty],
-                            [IntrInaccessibleMemOnly, IntrHasSideEffects, ImmArg<ArgIndex<6>>]>;
+                            [IntrInaccessibleMemOnly, ImmArg<ArgIndex<6>>]>;
   class SME_FP8_ZA_SINGLE_VGx1_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty],
-                            [IntrInaccessibleMemOnly, IntrHasSideEffects]>;
+                            [IntrInaccessibleMemOnly]>;
 
   class SME_FP8_ZA_SINGLE_VGx2_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty],
-                            [IntrInaccessibleMemOnly, IntrHasSideEffects]>;
+                            [IntrInaccessibleMemOnly]>;
 
   class SME_FP8_ZA_SINGLE_VGx4_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty],
-                              [IntrInaccessibleMemOnly, IntrHasSideEffects]>;
+                              [IntrInaccessibleMemOnly]>;
 
   class SME_FP8_ZA_MULTI_VGx2_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                  llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                  llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-                            [IntrInaccessibleMemOnly, IntrHasSideEffects]>;
+                            [IntrInaccessibleMemOnly]>;
 
   class SME_FP8_ZA_MULTI_VGx4_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                  llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                  llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-                            [IntrInaccessibleMemOnly, IntrHasSideEffects]>;
+                            [IntrInaccessibleMemOnly]>;
   //
   // CVT from FP8 to half-precision/BFloat16 multi-vector
   //
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 4f6a413ba5e5c..ff850751acf48 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -102,6 +102,8 @@ class sme_outer_product_pseudo<ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag>
   // Translated to the actual instructions in AArch64ISelLowering.cpp
   let SMEMatrixType = za_flag;
   let usesCustomInserter = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
 }
 
 class sme2_za_array_2op_multi_single_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
@@ -110,6 +112,8 @@ class sme2_za_array_2op_multi_single_pseudo<string name, Operand index_ty, Regis
       Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), []> {
   let SMEMatrixType = za_flag;
   let usesCustomInserter = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
 }
 
 class sme2_za_array_2op_multi_multi_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
@@ -118,6 +122,8 @@ class sme2_za_array_2op_multi_multi_pseudo<string name, Operand index_ty, Regist
       Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), []> {
   let SMEMatrixType = za_flag;
   let usesCustomInserter = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
 }
 
 class sme2_za_array_2op_multi_index_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
@@ -126,6 +132,8 @@ class sme2_za_array_2op_multi_index_pseudo<string name, Operand index_ty, Regist
       Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, imm_ty:$i), []> {
   let SMEMatrixType = za_flag;
   let usesCustomInserter = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
 }
 
 class sme2_move_to_za_pseudo<string name, Operand imm_ty, RegisterOperand multi_vector_ty, SMEMatrixTypeEnum za_flag>
@@ -133,6 +141,7 @@ class sme2_move_to_za_pseudo<string name, Operand imm_ty, RegisterOperand multi_
       Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> {
   let SMEMatrixType = za_flag;
   let usesCustomInserter = 1;
+  let mayStore = 1;
 }
 
 class sme2_move_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, RegisterOperand multi_vector_ty, SMEMatrixTypeEnum za_flag>
@@ -140,6 +149,7 @@ class sme2_move_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, Re
       Pseudo<(outs), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> {
   let SMEMatrixType = za_flag;
   let usesCustomInserter = 1;
+  let mayStore = 1;
 }
 
 class sem2p1_zero_matrix_pseudo<string name, Operand index_ty, SMEMatrixTypeEnum za_flag>
@@ -147,6 +157,7 @@ class sem2p1_zero_matrix_pseudo<string name, Operand index_ty, SMEMatrixTypeEnum
       Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, index_ty:$imm), []> {
   let SMEMatrixType = za_flag;
   let usesCustomInserter = 1;
+  let mayStore = 1;
 }
 
 class sme2_movez_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, RegisterOperand vector_ty, SMEMatrixTypeEnum za_flag>
@@ -154,6 +165,8 @@ class sme2_movez_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, R
       Pseudo<(outs vector_ty:$Zn), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm), []> {
   let SMEMatrixType = za_flag;
   let usesCustomInserter = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
 }
 
 class sme2_movaz_array_to_tile_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
@@ -162,6 +175,8 @@ class sme2_movaz_array_to_tile_pseudo<string name, Operand index_ty, RegisterOpe
       Pseudo<(outs multi_vector_ty:$Zd), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3), []> {
   let SMEMatrixType = za_flag;
   let usesCustomInserter = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
 }
 
 //===----------------------------------------------------------------------===//
@@ -665,6 +680,8 @@ class sme_add_vector_to_tile_pseudo<ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag>
   // Translated to the actual instructions in AArch64ISelLowering.cpp
   let SMEMatrixType = za_flag;
   let usesCustomInserter = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
 }
 
 multiclass sme_add_vector_to_tile_u32<bit V, string mnemonic, SDPatternOperator op> {
@@ -1123,6 +1140,7 @@ class sme_mova_insert_pseudo<SMEMatrixTypeEnum za_flag>
   // Translated to the actual instructions in AArch64ISelLowering.cpp
   let SMEMatrixType = za_flag;
   let usesCustomInserter = 1;
+  let mayStore = 1;
 }
 
 multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
@@ -1317,6 +1335,7 @@ multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
                                    is_col, sme_elm_idx0_15, mnemonic> {
     bits<4> imm;
     let Inst{8-5} = imm;
+    let mayLoad = 1;
   }
   def _H : sme_tile_to_vector_inst<0b0, 0b01, ZPR16, !if(is_col, TileVectorOpV16,
                                                                  TileVectorOpH16),
@@ -1325,6 +1344,7 @@ multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
     bits<3> imm;
     let Inst{8}   = ZAn;
     let Inst{7-5} = imm;
+    let mayLoad = 1;
   }
   def _S : sme_tile_to_vector_inst<0b0, 0b10, ZPR32, !if(is_col, TileVectorOpV32,
                                                                  TileVectorOpH32),
@@ -1333,6 +1353,7 @@ multiclass sme_tile...
[truncated]

kmclaughlin-arm · 2025-03-20T12:06:08Z

llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll

+  <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>
+)
+
+declare void @dummy_use_2_nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)


Would it be possible to add a single function that takes a variable number of arguments instead? e.g.

Suggested change

declare void @dummy_use_2_nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)

declare void @use(...)

+1 for this or alternatively don't have tests that call multiple intrinsics for this very reason.

Thanks ! Didn't know llvm IR had this functionality

paulwalker-arm · 2025-04-04T11:39:29Z

llvm/include/llvm/IR/IntrinsicsAArch64.td

  class SME2_VG2_Multi_Imm_Intrinsic
    : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],


It is worth renaming instances like this so that SME is only used for intrinsics that related to ZA or ZT. Doing this makes it easier to spot potentially missing InaccessibleMem flags.

That makes sense. But maybe that should be separate patch ?

I guess you could land an NFC change first and then update this PR to it but it's all in the same file so I'm happy either way. What I care most about is when reviewing this PR, it can be seen that all the SME classes have some combination of InaccessibleMem flags.

I have updated the name of the classes to sve when they are not accessin ZA array. I have left the name of the intrinsics intact though as to avoid rewriting all the tests as well. If you would like me to change those as well let me know.

Many thanks. No need to rename any intrinsics for this PR, the renamed classes is enough to better verify the changes.

From what I can see the only misnamed intrinsics are famin and famax?

Also sme_fp8_scale ones.

paulwalker-arm · 2025-04-04T11:49:52Z

llvm/lib/Target/AArch64/SMEInstrFormats.td

+  let mayLoad = 1;
+  let mayStore = 1;


To aid review would I be correct in saying you had no choice in adding these and thus all such changes are the result of getting LLVM to build after making the changes to IntrinsicsAArch64.td?

Yes. Instructions need to define same memory behaviour as the instrinsics. patterns fail otherwise.

llvm/include/llvm/IR/IntrinsicsAArch64.td

paulwalker-arm · 2025-04-04T12:11:47Z

llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll

+  <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>
+)
+
+declare void @dummy_use_2_nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)


+1 for this or alternatively don't have tests that call multiple intrinsics for this very reason.

paulwalker-arm

Please can you also rename the following classes to be prefixed by SVE2:
SME2_FP8_CVT_X2_Single_Intrinsic
SME2_FP8_CVT_Single_X4_Intrinsic

paulwalker-arm · 2025-04-11T10:25:51Z

llvm/include/llvm/IR/IntrinsicsAArch64.td

@@ -3257,36 +3257,36 @@ let TargetPrefix = "aarch64" in {
   : DefaultAttrsIntrinsic<[],
               [llvm_i32_ty,
                llvm_anyvector_ty, LLVMMatchType<0>],
-               []>;
+               [IntrWriteMem, IntrInaccessibleMemOnly]>;


This represents add/sub accumulation instructions and so also read ZA?

Related to this it looks like the following classes are used by the non-accumulation variants of add/sub and thus are over specified because those variants only write to ZA?:

SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic

Given it's not strictly speaking a bug, just a missed optimisation, I'm happy to let them slide for this PR if that's your preference but we should follow up and break them out.

I completely missed this ! Thanks for pointing this out. I have fixed it now.

paulwalker-arm · 2025-04-11T10:26:04Z

llvm/include/llvm/IR/IntrinsicsAArch64.td


  class SME2_ZA_Write_VG4_Intrinsic
   : DefaultAttrsIntrinsic<[],
               [llvm_i32_ty,
                llvm_anyvector_ty, LLVMMatchType<0>,
                LLVMMatchType<0>,  LLVMMatchType<0>],
-               []>;
+               [IntrWriteMem, IntrInaccessibleMemOnly]>;


This represents add/sub accumulation instructions and so also read ZA?

This patch changes how ZA array is modelled at LLVM-IR level. Currently accesses to ZA are represented at LLVM-IR level as memory reads and writes and at instruction level as unmodeled side-effects. This patch changes that and models them as purely Inaccessible memory accesses without any unmodeled side-effects.

Lukacma requested review from paulwalker-arm, CarolineConcatto and kmclaughlin-arm March 19, 2025 16:41

llvmbot added backend:AArch64 llvm:ir labels Mar 19, 2025

kmclaughlin-arm reviewed Mar 20, 2025

View reviewed changes

paulwalker-arm reviewed Apr 4, 2025

View reviewed changes

paulwalker-arm reviewed Apr 11, 2025

View reviewed changes

Lukacma mentioned this pull request Apr 11, 2025

[AArch64][clang][llvm] Add structured sparsity outer product (TMOP) intrinsics #135145

Merged

paulwalker-arm approved these changes Apr 11, 2025

View reviewed changes

Lukacma added 5 commits April 14, 2025 11:53

[AArch64] Model ZA array using inaccessible memory

d27701b

Simplify dummy functions

cec3fbe

Rename SME classes not accessing ZA to SVE

b98f4c4

Fix incorrect properties for some ZA intrinsics

ca0a3e1

Remove dummy use from one of the tests

cbc738c

Lukacma force-pushed the za-properties branch from 77b2736 to cbc738c Compare April 14, 2025 12:59

Lukacma merged commit efe9cb0 into llvm:main Apr 14, 2025
8 of 11 checks passed

	declare void @dummy_use_2_nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
	declare void @use(...)

		class SME2_VG2_Multi_Imm_Intrinsic
		: DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],

[AArch64] Model ZA array using inaccessible memory #132058

[AArch64] Model ZA array using inaccessible memory #132058

Uh oh!

Conversation

Lukacma commented Mar 19, 2025

Uh oh!

llvmbot commented Mar 19, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

paulwalker-arm left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

paulwalker-arm Apr 11, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Mar 19, 2025 •

edited

Loading

paulwalker-arm Apr 11, 2025 •

edited

Loading