Update base for Update on "[ET-VK] Add coop shader for int8 linear"

SS-JIA · SS-JIA · commit 3277f6d35841 · 2025-04-23T11:02:17.000-07:00
Title says it all! ## Changes * Apply co-operative shader for vector * matrix computations. Differential Revision: [D73279548](https://our.internmc.facebook.com/intern/diff/D73279548/) [ghstack-poisoned]
diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear_tiled.glsl b/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear_tiled.glsl
@@ -53,7 +53,7 @@ void main() {
   $if SCALES_STORAGE == "buffer":
     const VEC4_T scales = VEC4_T(t_scales[out_col >> 2]);
   $else:
-    const VEC4_T scales = VEC4_T(texelFetch(t_scales, ivec3(out_col >> 2, 0, 0), 0));
+    const VEC4_T scales = VEC4_T(texelFetch(t_scales, ivec2(out_col >> 2, 0), 0));
 
   [[unroll]] for (int i = 0; i < TILE_ROWS; ++i) {
     c[i] = VEC4_T(0.0);
diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear_tiled.yaml b/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear_tiled.yaml
@@ -10,7 +10,7 @@ q_8w_linear_tiled:
     IN_STORAGE: texture3d
     OUT_STORAGE: texture3d
     WEIGHT_STORAGE: texture2d
-    SCALES_STORAGE: buffer
+    SCALES_STORAGE: texture2d
     TILE_ROWS: 4
   generate_variant_forall:
     TILE_ROWS:
@@ -21,11 +21,12 @@ q_8w_linear_tiled:
       - VALUE: 6
         SUFFIX: o4x6
   shader_variants:
-    - NAME: q_8w_linear_tiled_texture3d_texture3d_texture2d_float
-    - NAME: q_8w_linear_tiled_buffer_buffer_texture2d_float
+    - NAME: q_8w_linear_tiled_texture3d_texture3d_texture2d_texture2d_float
+    - NAME: q_8w_linear_tiled_buffer_buffer_texture2d_texture2d_float
       IN_STORAGE: buffer
       OUT_STORAGE: buffer
-    - NAME: q_8w_linear_tiled_buffer_buffer_buffer_float
+    - NAME: q_8w_linear_tiled_buffer_buffer_buffer_buffer_float
       IN_STORAGE: buffer
       OUT_STORAGE: buffer
       WEIGHT_STORAGE: buffer
+      SCALES_STORAGE: buffer
diff --git a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearInt8.cpp b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearInt8.cpp
@@ -161,14 +161,19 @@ void add_q_8w_linear_tiled_node(
   ValueRef q_mat2 = prepack_standard_hw_transposed(
       graph, q_mat2_data, q_mat2_storage, utils::kWidthPacked);
 
+  utils::StorageType scales_storage = utils::kTexture2D;
+  if (N > max_extent) {
+    scales_storage = utils::kBuffer;
+  }
   ValueRef scales =
-      prepack_standard(graph, scales_data, utils::kBuffer, utils::kWidthPacked);
+      prepack_standard(graph, scales_data, scales_storage, utils::kWidthPacked);
 
   std::string kernel_name = "q_8w_linear_tiled";
   kernel_name.reserve(kShaderNameReserve);
   add_storage_type_suffix(kernel_name, graph.storage_type_of(out));
   add_storage_type_suffix(kernel_name, graph.storage_type_of(mat1));
   add_storage_type_suffix(kernel_name, graph.storage_type_of(q_mat2));
+  add_storage_type_suffix(kernel_name, graph.storage_type_of(scales));
   add_dtype_suffix(kernel_name, graph.dtype_of(out));
 
   std::vector<int64_t> mat1_sizes = graph.sizes_of(mat1);
@@ -177,6 +182,9 @@ void add_q_8w_linear_tiled_node(
   if (M % 6 == 0) {
     kernel_name += "_o4x6";
     out_tile_nrows = 6;
+  } else if (M % 4 == 0) {
+    kernel_name += "_o4x4";
+    out_tile_nrows = 4;
   } else if (M % 1 == 0) {
     kernel_name += "_o4x1";
     out_tile_nrows = 1;