[Feature] storage resize_ support custom device. (pytorch#99882)

wbigat · pytorchmergebot · commit b02aa5e71d93 · 2023-04-27T20:18:35.000Z
Fixes pytorch#99326 Support storage resize_ for custom device, by calling dispatched tensor operations. @ezyang this pr is another case that was brought up in issue pytorch#99326, please take a moment to review this change. Pull Request resolved: pytorch#99882 Approved by: https://github.com/ezyang
diff --git a/test/cpp_extensions/open_registration_extension.cpp b/test/cpp_extensions/open_registration_extension.cpp
@@ -128,6 +128,20 @@ bool custom_is_pinned(const at::Tensor& self, c10::optional<at::Device> device)
   return false;
 }
 
+const at::Tensor& custom_resize_(const at::Tensor& self, at::IntArrayRef size,
+                          c10::optional<at::MemoryFormat> optional_memory_format) {
+  self.unsafeGetTensorImpl()->set_sizes_contiguous(size);
+  const auto itemsize = self.unsafeGetTensorImpl()->dtype().itemsize();
+  const auto offset = self.unsafeGetTensorImpl()->storage_offset();
+  const auto storage_size = at::detail::computeStorageNbytesContiguous(size, itemsize, offset);
+  const auto &storage = self.unsafeGetTensorImpl()->unsafe_storage();
+  if (storage_size > storage.nbytes()) {
+    storage.unsafeGetStorageImpl()->set_nbytes(storage_size);
+  }
+
+  return self;
+}
+
 // This macro does the heavy lifting.
 // With TORCH_LIBRARY_IMPL, you can register custom kernels for your backend.
 // For open registration, we're registering all of our kernels to the PrivateUse1 dispatch key.
@@ -146,6 +160,7 @@ TORCH_LIBRARY_IMPL(aten, PrivateUse1, m) {
   m.impl("set_.source_Storage", &custom_set_source_Storage);
   m.impl("_pin_memory", &custom__pin_memory);
   m.impl("is_pinned", &custom_is_pinned);
+  m.impl("resize_", &custom_resize_);
 }
 
 // This basic implementation doesn't bother dealing with different device indices
diff --git a/test/test_cpp_extensions_open_device_registration.py b/test/test_cpp_extensions_open_device_registration.py
@@ -264,6 +264,17 @@ def test_open_device_serialization():
             foo_storage = torch.serialization.default_restore_location(cpu_storage, 'foo:0')
             self.assertTrue(foo_storage.is_foo)
 
+        def test_open_device_storage_resize(self):
+            torch.utils.rename_privateuse1_backend('foo')
+            cpu_tensor = torch.randn([8])
+            foo_tensor = cpu_tensor.foo()
+            foo_storage = foo_tensor.storage()
+            self.assertTrue(foo_storage.size() == 8)
+            foo_storage.resize_(8)
+            self.assertTrue(foo_storage.size() == 8)
+            with self.assertRaisesRegex(RuntimeError, 'overflow'):
+                foo_storage.resize_(8**29)
+
         test_base_device_registration()
         test_before_common_registration()
         test_common_registration()
@@ -274,6 +285,8 @@ def test_open_device_serialization():
         test_open_device_storage()
         test_open_device_storage_pin_memory()
         test_open_device_serialization()
+        test_open_device_storage_resize()
+
 
 if __name__ == "__main__":
     common.run_tests()
diff --git a/torch/csrc/StorageMethods.cpp b/torch/csrc/StorageMethods.cpp
@@ -134,6 +134,34 @@ static PyObject* THPStorage_resize_(PyObject* self, PyObject* number_arg) {
     const auto size_bytes = static_cast<size_t>(size_bytes_i);
     at::native::resize_bytes_cuda(storage.unsafeGetStorageImpl(), size_bytes);
 #endif
+  } else if (device_type == at::kPrivateUse1) {
+    ptrdiff_t size_bytes_i = newsize;
+    TORCH_CHECK(
+        !c10::overflows<int64_t>(size_bytes_i),
+        "Requested storage size (",
+        size_bytes_i,
+        ") cannot be represented as a int64_t");
+    const auto size_bytes = static_cast<int64_t>(size_bytes_i);
+    void* original_data_ptr = storage.data_ptr().get();
+
+    auto src_option =
+        c10::TensorOptions().device(storage.device()).dtype(at::kByte);
+    auto src_tensor = at::empty({0}, {}, src_option).set_(storage);
+    src_tensor.resize_({size_bytes});
+
+    // When using resize_ to replace resize_bytes_xxx, in some cases
+    // the original data_ptr is still returned, which is an inconsistent
+    // behavior when compared to resize_bytes_xxx. For these cases,
+    // an additional memory copy and update for storage are required.
+    if (original_data_ptr == src_tensor.storage().data_ptr().get()) {
+      auto new_tensor = at::empty(src_tensor.sizes(), src_tensor.options());
+      new_tensor.copy_(src_tensor);
+      storage.set_data_ptr_noswap(
+          std::move(const_cast<at::DataPtr&>(new_tensor.storage().data_ptr())));
+      storage.unsafeGetStorageImpl()->set_allocator(
+          new_tensor.storage().unsafeGetStorageImpl()->allocator());
+      storage.set_nbytes(new_tensor.storage().nbytes());
+    }
   } else {
     TORCH_CHECK(
         false,