Enable zero-size tensors (#3640)

SS-JIA · facebook-github-bot · commit 5c70121323fd · 2024-05-16T14:07:42.000-07:00
Summary: Pull Request resolved: #3640 As title. The approach is slightly different than in PyTorch Vulkan. Instead of binding no memory, we make a small allocation. The reason for this change is to account for the possibility that some zero size tensors are used as input but the output is not zero size. In that case we still need to be able to bind the zero size tensor to a shader. Reviewed By: yipjustin Differential Revision: D57450473 fbshipit-source-id: 753e0dedf40f4a6ee6153980159fce34b8c41b5e
diff --git a/backends/vulkan/runtime/api/memory/Buffer.cpp b/backends/vulkan/runtime/api/memory/Buffer.cpp
@@ -38,16 +38,18 @@ VulkanBuffer::VulkanBuffer(
       memory_{},
       owns_memory_(allocate_memory),
       handle_(VK_NULL_HANDLE) {
-  // Only allocate memory if the buffer has non-zero size
+  // If the buffer size is 0, allocate a buffer with a size of 1 byte. This is
+  // to ensure that there will be some resource that can be bound to a shader.
   if (size == 0) {
-    return;
+    buffer_properties_.size = 1u;
+    buffer_properties_.mem_range = 1u;
   }
 
   const VkBufferCreateInfo buffer_create_info{
       VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // sType
       nullptr, // pNext
       0u, // flags
-      size, // size
+      buffer_properties_.size, // size
       buffer_properties_.buffer_usage, // usage
       VK_SHARING_MODE_EXCLUSIVE, // sharingMode
       0u, // queueFamilyIndexCount
diff --git a/backends/vulkan/runtime/api/memory/Image.cpp b/backends/vulkan/runtime/api/memory/Image.cpp
@@ -129,11 +129,14 @@ VulkanImage::VulkanImage(
   VmaAllocatorInfo allocator_info{};
   vmaGetAllocatorInfo(allocator_, &allocator_info);
 
-  // If any dims are zero, then no memory will be allocated for the image.
+  // If any dims are zero, then allocate a 1x1x1 image texture. This is to
+  // ensure that there will be some resource that can be bound to a shader.
   if (image_props.image_extents.width == 0 ||
       image_props.image_extents.height == 0 ||
       image_props.image_extents.depth == 0) {
-    return;
+    image_properties_.image_extents.width = 1u;
+    image_properties_.image_extents.height = 1u;
+    image_properties_.image_extents.depth = 1u;
   }
 
   const VkImageCreateInfo image_create_info{
diff --git a/backends/vulkan/test/op_tests/cases.py b/backends/vulkan/test/op_tests/cases.py
@@ -490,6 +490,7 @@ def get_cat_inputs():
     test_suite = VkTestSuite(
         [
             # Cat on Height
+            ([(S1, S1, 3, 5), (S1, S1, 0, 5)], 2),
             ([(S1, S1, 3, 5), (S1, S1, 4, 5)], 2),
             ([(S1, 3, 5), (S1, 4, 5)], 1),
             ([(3, 5), (4, 5)], 0),
@@ -501,6 +502,7 @@ def get_cat_inputs():
             # Cat on Width
             ([(S1, S1, 5, 3), (S1, S1, 5, 4)], 3),
             ([(S1, 5, 3), (S1, 5, 4)], 2),
+            ([(5, 0), (5, 4)], 1),
             ([(5, 3), (5, 4)], 1),
             ([(5, 3), (5, 4), (5, 1)], 1),
             (
@@ -521,6 +523,7 @@ def get_cat_inputs():
                 0,
             ),
             # Cat on Channel
+            ([(S, 5, 4), (0, 5, 4), (S2, 5, 4)], 0),
             ([(S, 5, 4), (S1, 5, 4), (S2, 5, 4)], 0),
             ([(XS, 5, 4), (XS, 5, 4), (S2, 5, 4)], 0),
             ([(XS, S, 5, 4), (XS, S1, 5, 4), (XS, S2, 5, 4)], 1),
diff --git a/backends/vulkan/test/test_vulkan_delegate.py b/backends/vulkan/test/test_vulkan_delegate.py
@@ -929,6 +929,27 @@ def forward(self, x, y, z, w):
             memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
+    def test_vulkan_backend_cat_with_zero_size(self):
+        class TestModule(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+
+            def forward(self, x, y, z, w):
+                return torch.cat([x, y, z, w], dim=1)
+
+        sample_inputs = (
+            torch.randn(size=(3, 6, 2, 7), dtype=torch.float32),
+            torch.randn(size=(3, 0, 2, 7), dtype=torch.float32),
+            torch.randn(size=(3, 0, 2, 7), dtype=torch.float32),
+            torch.randn(size=(3, 3, 2, 7), dtype=torch.float32),
+        )
+
+        self.lower_module_and_test_output(
+            TestModule(),
+            sample_inputs,
+            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
+        )
+
     def test_vulkan_backend_slice(self):
         class TestModule(torch.nn.Module):
             def __init__(self):
diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp
@@ -259,6 +259,38 @@ TEST_F(VulkanComputeAPITest, test_buffer_int8) {
   test_storage_buffer_type<int8_t, api::kQInt8>(16);
 }
 
+TEST_F(VulkanComputeAPITest, test_zero_size_tensor) {
+  // Simple test that performs a + b -> c
+
+  std::vector<int64_t> sizes = {0, 5, 7};
+  vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
+  vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
+  vTensor c = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
+
+  // Fill input tensors
+  fill_vtensor(a, 2.5f);
+  fill_vtensor(b, 1.5f);
+
+  // a + b -> c
+  record_binary_op(api::context(), "add", a, b, c);
+
+  // Extract output tensor
+  std::vector<float> data_out = extract_vtensor(c);
+
+  // Assert all tensors are empty
+  ASSERT_TRUE(a.numel() == 0);
+  ASSERT_TRUE(b.numel() == 0);
+  ASSERT_TRUE(c.numel() == 0);
+  ASSERT_TRUE(a.nbytes() == 0);
+  ASSERT_TRUE(b.nbytes() == 0);
+  ASSERT_TRUE(c.nbytes() == 0);
+
+  // Check output
+  for (size_t i = 0; i < data_out.size(); ++i) {
+    CHECK_VALUE(data_out, i, 4.0f);
+  }
+}
+
 TEST_F(VulkanComputeAPITest, texture_add_sanity_check) {
   // Simple test that performs a + b -> c