llvm · MacDue · Jan 11, 2024 · Jan 8, 2024
@@ -20,12 +20,9 @@ func.func @entry() {
 
   %c123_f32 = arith.constant 123.0 : f32
 
-  %min_elts_s = arith.constant 4 : index
-  %vscale = vector.vscale
-
   // "svl" refers to the Streaming Vector Length and "svl_s" the number of
   // 32-bit elements in a vector of SVL bits.
-  %svl_s = arith.muli %min_elts_s, %vscale : index
+  %svl_s = arm_sme.streaming_vl <word>
 
   %tile_init = bufferization.alloc_tensor(%svl_s, %svl_s) : tensor<?x?xf32>
 

@@ -17,9 +17,7 @@ func.func @entry() {
   %c1_i32 = arith.constant 1 : i32
 
   // Calculate the size of a 32-bit tile, e.g. ZA{n}.s.
-  %vscale = vector.vscale
-  %min_elts_s = arith.constant 4 : index
-  %svl_s = arith.muli %min_elts_s, %vscale : index
+  %svl_s = arm_sme.streaming_vl <word>
   %za_s_size = arith.muli %svl_s, %svl_s : index
 
   // Allocate memory.

@@ -134,12 +134,6 @@ func.func @initialize_memory(%d0 : index, %d1 : index) -> memref<?x?xf32> {
   return %A : memref<?x?xf32>
 }
 
-// This will be made a streaming function by enable-arm-streaming so return SVL.
-func.func @get_svl() -> index {
-  %vscale = vector.vscale
-  return %vscale : index
-}
-
 func.func @entry() {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
@@ -148,8 +142,7 @@ func.func @entry() {
 
   // Allocate enough memory to load a 32-bit tile plus a tiny bit more to test
   // non-zero offsets while remaining inbounds.
-  %svl = call @get_svl() : () -> index
-  %svl_s = arith.muli %c4, %svl : index
+  %svl_s = arm_sme.streaming_vl <word>
   %svl_s_plus_two = arith.addi %svl_s, %c2 : index
 
   %A = call @initialize_memory(%svl_s_plus_two, %svl_s_plus_two) : (index, index) -> memref<?x?xf32>

@@ -96,12 +96,6 @@ func.func @initialize_memory(%d0 : index, %d1 : index) -> memref<?x?xf32> {
   return %A : memref<?x?xf32>
 }
 
-// This will be made a streaming function by enable-arm-streaming so return SVL.
-func.func @get_svl() -> index {
-  %vscale = vector.vscale
-  return %vscale : index
-}
-
 func.func @entry() {
   %c0 = arith.constant 0 : index
   %c2 = arith.constant 2 : index
@@ -111,8 +105,7 @@ func.func @entry() {
   //
   // Allocate enough memory to load a 32-bit tile plus a tiny bit more to test
   // non-zero offsets while remaining inbounds.
-  %svl = call @get_svl() : () -> index
-  %svl_s = arith.muli %c4, %svl : index
+  %svl_s = arm_sme.streaming_vl <word>
   %svl_s_plus_two = arith.addi %svl_s, %c2 : index
   %A = call @initialize_memory(%svl_s_plus_two, %svl_s_plus_two) : (index, index) -> memref<?x?xf32>
 

@@ -17,9 +17,7 @@ func.func @entry() {
   %c1_i32 = arith.constant 1 : i32
 
   // Calculate the size of a 32-bit tile, e.g. ZA{n}.s.
-  %vscale = vector.vscale
-  %min_elts_s = arith.constant 4 : index
-  %svl_s = arith.muli %min_elts_s, %vscale : index
+  %svl_s = arm_sme.streaming_vl <word>
   %za_s_size = arith.muli %svl_s, %svl_s : index
 
   // Allocate memory.

@@ -24,12 +24,9 @@ func.func @za0_d_f64() -> i32 {
   %c1_f64 = arith.constant 1.0 : f64
   %c1_index = arith.constant 1 : index
 
-  %min_elts_d = arith.constant 2 : index
-  %vscale = vector.vscale
-
   // "svl" refers to the Streaming Vector Length and "svl_d" the number of
   // 64-bit elements in a vector of SVL bits.
-  %svl_d = arith.muli %min_elts_d, %vscale : index
+  %svl_d = arm_sme.streaming_vl <double>
 
   // Allocate "mem1" and fill each "row" with row number.
   //
@@ -170,13 +167,10 @@ func.func @load_store_two_za_s_tiles() -> i32 {
   %c1_index = arith.constant 1 : index
   %c2_index = arith.constant 2 : index
 
-  %min_elts_s = arith.constant 4 : index
-  %vscale = vector.vscale
-
   // "svl" refers to the Streaming Vector Length and "svl_s" can mean either:
   // * the number of 32-bit elements in a vector of SVL bits.
   // * the number of tile slices (1d vectors) in a 32-bit element tile.
-  %svl_s = arith.muli %min_elts_s, %vscale : index
+  %svl_s = arm_sme.streaming_vl <word>
 
   // Allocate memory for two 32-bit element tiles.
   %size_of_tile = arith.muli %svl_s, %svl_s : index

@@ -14,12 +14,9 @@ func.func @entry() -> i32 {
   %c1_i8 = arith.constant 1 : i8
   %c1_index = arith.constant 1 : index
 
-  %c16 = arith.constant 16 : index
-  %vscale = vector.vscale
-
   // "svl" refers to the Streaming Vector Length and "svl_b" the number of
   // 8-bit elements in a vector of SVL bits.
-  %svl_b = arith.muli %c16, %vscale : index
+  %svl_b = arm_sme.streaming_vl <byte>
 
   // Allocate memory and fill with ones.
   //