Fix zero size tensors (#3702)

SS-JIA · facebook-github-bot · commit ab1c8aafc7d5 · 2024-05-22T06:41:39.000-07:00
Summary: Pull Request resolved: #3702 ## Context Dispatching a command buffer with a work group size that contains 0 is undefined behaviour. On some devices, this can cause the device to be lost. Fix this by setting the work group size to `{1, 1, 1}` right before dispatching a command buffer if the work group size contains a 0. Reviewed By: yipjustin Differential Revision: D57655257 fbshipit-source-id: 6209668c960f0e0afb0de0ab8b09c285e2de56b9
diff --git a/backends/vulkan/runtime/api/Context.cpp b/backends/vulkan/runtime/api/Context.cpp
@@ -91,16 +91,25 @@ void Context::register_shader_dispatch(
     const ShaderInfo& shader_descriptor,
     const utils::uvec3& global_workgroup_size) {
   // Adjust the global workgroup size based on the output tile size
+  uint32_t global_wg_w = utils::div_up(
+      global_workgroup_size.data[0u], shader_descriptor.out_tile_size.data[0u]);
+  uint32_t global_wg_h = utils::div_up(
+      global_workgroup_size.data[1u], shader_descriptor.out_tile_size.data[1u]);
+  uint32_t global_wg_d = utils::div_up(
+      global_workgroup_size.data[2u], shader_descriptor.out_tile_size.data[2u]);
+
+  // Submitting a global work group size of 0 is undefined behaviour. If this is
+  // detected then submit a single workgroup instead.
+  if (global_wg_w == 0u || global_wg_h == 0u || global_wg_d == 0u) {
+    global_wg_w = 1u;
+    global_wg_h = 1u;
+    global_wg_d = 1u;
+  }
+
   const utils::uvec3 effective_global_wg = {
-      utils::div_up(
-          global_workgroup_size.data[0u],
-          shader_descriptor.out_tile_size.data[0u]),
-      utils::div_up(
-          global_workgroup_size.data[1u],
-          shader_descriptor.out_tile_size.data[1u]),
-      utils::div_up(
-          global_workgroup_size.data[2u],
-          shader_descriptor.out_tile_size.data[2u]),
+      global_wg_w,
+      global_wg_h,
+      global_wg_d,
   };
 
   cmd_.bind_descriptors(descriptors.get_bind_handle());
diff --git a/backends/vulkan/test/op_tests/utils/codegen.py b/backends/vulkan/test/op_tests/utils/codegen.py
@@ -531,8 +531,11 @@ def gen_graph_build_code(self) -> str:
 
         return graph_build
 
-    def gen_graph_exec_code(self) -> str:
+    def gen_graph_exec_code(self, loop_range: int = 1) -> str:
         graph_exec = ""
+        if loop_range > 1:
+            graph_exec += f"for (int i = 0; i < {loop_range} ; ++i) "
+        graph_exec += "{\n"
         for aten_arg in self.args:
             ref = self.refs[aten_arg.name]
             if ref.is_in:
@@ -544,6 +547,8 @@ def gen_graph_exec_code(self) -> str:
 
         graph_exec += self.declare_vk_out_for(self.refs["out"])
         graph_exec += self.copy_from_staging(self.refs["out"])
+        graph_exec += self.check_graph_out(self.refs["out"])
+        graph_exec += "}\n"
 
         return graph_exec
 
@@ -564,7 +569,6 @@ def gen_op_check_fn(self) -> str:
         op_check_fn_body += self.gen_conditional_skips()
         op_check_fn_body += self.gen_graph_build_code()
         op_check_fn_body += self.gen_graph_exec_code()
-        op_check_fn_body += self.check_graph_out(self.refs["out"])
 
         # Add two level of indent for readability
         op_check_fn_body = re.sub(r"^", "        ", op_check_fn_body, flags=re.M)