Support mutable tensors in TensorParser (#4713)

JacobSzwejbka · facebook-github-bot · commit 610f3336847b · 2024-08-14T13:28:54.000-07:00
Summary: Pull Request resolved: #4713 Call the new method on program in tensor parser. Add a friend class so it can access it. Reviewed By: dvorjackz Differential Revision: D61222257
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -428,7 +428,7 @@ if(EXECUTORCH_BUILD_FLATC)
   # exir lets users set the alignment of tensor data embedded in the flatbuffer,
   # and some users need an alignment larger than the default, which is typically
   # 32.
-  target_compile_definitions(flatc PRIVATE FLATBUFFERS_MAX_ALIGNMENT=1024)
+  target_compile_definitions(flatc PRIVATE FLATBUFFERS_MAX_ALIGNMENT=2048)
 endif()
 if(NOT FLATC_EXECUTABLE)
   message(
diff --git a/runtime/executor/program.h b/runtime/executor/program.h
@@ -35,6 +35,11 @@ namespace testing {
 class ProgramTestFriend;
 } // namespace testing
 
+namespace deserialization {
+// Provides Tensor deserializaiton access to private Program methods.
+class TensorParser;
+} // namespace deserialization
+
 /**
  * A deserialized ExecuTorch program binary.
  */
@@ -194,6 +199,7 @@ class Program final {
   friend class BackendDelegate;
   friend class Executor;
   friend class Method;
+  friend class deserialization::TensorParser;
   friend class testing::ProgramTestFriend;
 
   const executorch_flatbuffer::Program* get_internal_program() const {
diff --git a/runtime/executor/tensor_parser_exec_aten.cpp b/runtime/executor/tensor_parser_exec_aten.cpp
@@ -19,6 +19,20 @@ namespace torch {
 namespace executor {
 namespace deserialization {
 
+// Provides access to private Program methods.
+class TensorParser final {
+ public:
+  __ET_NODISCARD static Error load_mutable_subsegment_into(
+      const Program* program,
+      size_t mutable_data_segments_index,
+      size_t offset_index,
+      size_t size,
+      void* buffer) {
+    return program->load_mutable_subsegment_into(
+        mutable_data_segments_index, offset_index, size, buffer);
+  }
+};
+
 namespace {
 
 // Retrieve the buffer specified by the allocation_info
@@ -94,14 +108,17 @@ __ET_NODISCARD Result<void*> getTensorDataPtr(
 
   // Memory Planned, with initial state
   if (data_buffer_idx > 0 && allocation_info != nullptr) {
-    // Stub case for now.
-
-    // Get memory planned data pointer
-
-    // Call something like program.load_into_buffer(s_tensor->segment_idx,
-    // s_tensor->data_buffer_idx, mem_planned_buffer, nbytes)
+    auto planned_ptr = getMemPlannedPtr(allocation_info, nbytes, allocator);
+    if (!planned_ptr.ok()) {
+      return planned_ptr.error();
+    }
+    auto err = TensorParser::load_mutable_subsegment_into(
+        program, 0, s_tensor->data_buffer_idx(), nbytes, planned_ptr.get());
 
-    return Error::NotImplemented;
+    if (err != Error::Ok) {
+      return err;
+    }
+    return planned_ptr;
 
     // Constant
   } else if (data_buffer_idx > 0 && allocation_info == nullptr) {
diff --git a/runtime/executor/test/tensor_parser_test.cpp b/runtime/executor/test/tensor_parser_test.cpp
@@ -120,3 +120,83 @@ TEST_F(TensorParserTest, TestModuleAddHalf) {
       torch::executor::ScalarType::Half,
       sizeof(torch::executor::Half));
 }
+
+TEST_F(TensorParserTest, TestMutableState) {
+  // Load the serialized ModuleSimpleTrain data.
+  const char* path = std::getenv("ET_MODULE_SIMPLE_TRAIN_PATH");
+  Result<FileDataLoader> train_loader = FileDataLoader::from(path);
+  ASSERT_EQ(train_loader.error(), Error::Ok);
+
+  Result<Program> program =
+      Program::load(&train_loader.get(), Program::Verification::Minimal);
+  EXPECT_EQ(program.error(), Error::Ok);
+
+  ManagedMemoryManager mmm(kDefaultNonConstMemBytes, kDefaultRuntimeMemBytes);
+  ManagedMemoryManager mmm_copy(
+      kDefaultNonConstMemBytes, kDefaultRuntimeMemBytes);
+
+  const executorch_flatbuffer::Program* internal_program =
+      ProgramTestFriend::GetInternalProgram(&program.get());
+  executorch_flatbuffer::ExecutionPlan* execution_plan =
+      internal_program->execution_plan()->GetMutableObject(0);
+  auto flatbuffer_values = execution_plan->values();
+
+  size_t num_mutable_tensors = 0;
+  for (size_t i = 0; i < flatbuffer_values->size(); ++i) {
+    auto serialization_value = flatbuffer_values->Get(i);
+    if (serialization_value->val_type() ==
+            executorch_flatbuffer::KernelTypes::Tensor &&
+        serialization_value->val_as_Tensor()->allocation_info() != nullptr &&
+        serialization_value->val_as_Tensor()->data_buffer_idx() > 0) {
+      num_mutable_tensors++;
+      Result<torch::executor::Tensor> tensor = parseTensor(
+          &program.get(), &mmm.get(), serialization_value->val_as_Tensor());
+      torch::executor::Tensor t = tensor.get();
+      float loaded_value = t.const_data_ptr<float>()[0];
+      ASSERT_NE(nullptr, t.const_data_ptr());
+      ASSERT_NE(t.mutable_data_ptr<float>()[0], 0.5);
+      t.mutable_data_ptr<float>()[0] = 0.5;
+      ASSERT_EQ(
+          t.mutable_data_ptr<float>()[0],
+          0.5); // 0.5 can be represented perfectly by float so EQ and NE work
+                // fine here. Any power of 2 rational can be perfectly
+                // represented. See dyadic rationals for more info.
+
+      // Load the same tensor using the same mem manager and show the value is
+      // updated again.
+      Result<torch::executor::Tensor> tensor1_alias = parseTensor(
+          &program.get(), &mmm.get(), serialization_value->val_as_Tensor());
+      torch::executor::Tensor t2 = tensor.get();
+      ASSERT_NE(t2.mutable_data_ptr<float>()[0], 0.5);
+
+      // Show the tensors are equivalent
+      ASSERT_EQ(t.const_data_ptr(), t2.const_data_ptr());
+      t.mutable_data_ptr<float>()[0] = 0.5;
+
+      // Load the same tensor using a different mem manager and show the value
+      // is not the same as t1.
+      Result<torch::executor::Tensor> tensor_new = parseTensor(
+          &program.get(),
+          &mmm_copy.get(),
+          serialization_value->val_as_Tensor());
+      torch::executor::Tensor t3 = tensor_new.get();
+      ASSERT_NE(t3.mutable_data_ptr<float>()[0], 0.5);
+      ASSERT_NE(t3.const_data_ptr(), t.const_data_ptr());
+      ASSERT_EQ(loaded_value, t3.const_data_ptr<float>()[0]);
+
+      // Hard check the first byte of the serialized data.
+      // 232 and 210 comes from inspecting the file itself. The
+      // file is seeded so this value should be stable.
+      if (num_mutable_tensors == 1) {
+        const uint8_t* byte_data =
+            reinterpret_cast<const uint8_t*>(t3.const_data_ptr());
+        ASSERT_EQ(byte_data[0], 232);
+      } else if (num_mutable_tensors == 2) {
+        const uint8_t* byte_data =
+            reinterpret_cast<const uint8_t*>(t3.const_data_ptr());
+        ASSERT_EQ(byte_data[0], 210);
+      }
+    }
+  }
+  ASSERT_EQ(num_mutable_tensors, 2);
+}
diff --git a/schema/CMakeLists.txt b/schema/CMakeLists.txt
@@ -49,7 +49,7 @@ function(generate_program_schema _schema_srcs _schema_name)
   # and some users need an alignment larger than the default, which is typically
   # 32.
   target_compile_definitions(
-    ${_schema_name} INTERFACE FLATBUFFERS_MAX_ALIGNMENT=1024)
+    ${_schema_name} INTERFACE FLATBUFFERS_MAX_ALIGNMENT=2048)
 
   target_include_directories(
     ${_schema_name}