[ExecuTorch][Weight Sharing][XNNPACK] load named data map data for xnnpack

mcr229 · mcr229 · commit 6ab001983591 · 2025-03-13T23:59:57.000-07:00
Pull Request resolved: #9152 If data is serialized into the NamedDataMap, then we overload getConstantDataPtr to retrieve the data from the named data map. This should be done in a Backwards Compatible way. Meaning if no data is serialized into the named data map, then we are still loading the data from the flatbuffer payload. Since the runtime change here is being made before the AoT changes, All CI on this diff by itself should test that the changes made here are backwards compatitble. Note: We do not resolve Runtime Memory usage at this point. WeightCache will be implemented in the next diff. Meaning If we load via the same key across different methods, we still pack twice and allocate two instances for the packed weights. ghstack-source-id: 271732048 @exported-using-ghexport Differential Revision: [D70315209](https://our.internmc.facebook.com/intern/diff/D70315209/)
diff --git a/backends/xnnpack/runtime/XNNCompiler.cpp b/backends/xnnpack/runtime/XNNCompiler.cpp
@@ -10,7 +10,7 @@
 #include <executorch/backends/xnnpack/runtime/XNNHeader.h>
 #include <executorch/backends/xnnpack/serialization/schema_generated.h>
 #include <executorch/extension/threadpool/threadpool.h>
-#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
+#include <executorch/runtime/executor/pte_data_map.h>
 #include <unordered_map>
 
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
@@ -22,7 +22,9 @@ namespace xnnpack {
 namespace delegate {
 
 using executorch::runtime::Error;
+using executorch::runtime::FreeableBuffer;
 using executorch::runtime::MemoryAllocator;
+using executorch::runtime::NamedDataMap;
 using executorch::runtime::Result;
 
 /*
@@ -48,6 +50,7 @@ class CompileAllocator {
 using ValuePtr = const fb_xnnpack::XValue*;
 using NodePtr = const fb_xnnpack::XNode*;
 using GraphPtr = const fb_xnnpack::XNNGraph*;
+using ConstantDataOffsetPtr = const fb_xnnpack::ConstantDataOffset*;
 using DataType = fb_xnnpack::XNNDatatype;
 
 // Type for define node function. This is the function signature
@@ -162,7 +165,9 @@ data associated with the tensor value, then returns nullptr.
 const uint8_t* getConstantDataPtr(
     const fb_xnnpack::XNNTensorValue* tensor_value,
     GraphPtr flatbuffer_graph,
-    const uint8_t* constant_data_ptr) {
+    const uint8_t* constant_data_ptr,
+    const NamedDataMap* named_data_map,
+    std::vector<FreeableBuffer>& loaded_buffers_from_map) {
   auto buffer_idx = tensor_value->constant_buffer_idx();
   if (buffer_idx) {
     if (!constant_data_ptr) {
@@ -171,10 +176,31 @@ const uint8_t* getConstantDataPtr(
       const auto& constant_buffer = *flatbuffer_graph->constant_buffer();
       return constant_buffer[buffer_idx]->storage()->data();
     } else {
-      const auto& constant_data_offsets = *flatbuffer_graph->constant_data();
-      uint64_t constant_data_offset =
-          constant_data_offsets[buffer_idx]->offset();
-      return constant_data_ptr + constant_data_offset;
+      ConstantDataOffsetPtr constant_data_offset =
+          flatbuffer_graph->constant_data()->Get(buffer_idx);
+      uint64_t offset = constant_data_offset->offset();
+
+      bool has_named_key = flatbuffers::IsFieldPresent(
+          constant_data_offset, fb_xnnpack::ConstantDataOffset::VT_NAMED_KEY);
+      // If there is no tensor name
+      if (!has_named_key) {
+        return constant_data_ptr + offset;
+      } else {
+        const std::string& data_name = constant_data_offset->named_key()->str();
+        Result<FreeableBuffer> buffer =
+            named_data_map->get_data(data_name.c_str());
+        if (!buffer.ok()) {
+          ET_LOG(
+              Error,
+              "Failed to get constant data for key %s",
+              data_name.c_str());
+          return nullptr;
+        }
+        const uint8_t* data_ptr =
+            static_cast<const uint8_t*>(buffer.get().data());
+        loaded_buffers_from_map.push_back(std::move(buffer.get()));
+        return data_ptr;
+      }
     }
   }
 
@@ -194,7 +220,9 @@ Error defineTensor(
     const uint8_t* constant_data_ptr,
     std::vector<uint32_t>& input_ids,
     std::vector<uint32_t>& output_ids,
-    CompileAllocator& allocator) {
+    CompileAllocator& allocator,
+    const NamedDataMap* named_data_map,
+    std::vector<FreeableBuffer>& loaded_buffers_from_map) {
   const fb_xnnpack::XNNTensorValue* tensor_value = nullptr;
   const fb_xnnpack::XNNQuantizedTensorValue* qtensor_value = nullptr;
 
@@ -231,8 +259,12 @@ Error defineTensor(
 
   // Get Pointer to constant data from flatbuffer, if its non-constant
   // it is a nullptr
-  const uint8_t* buffer_ptr =
-      getConstantDataPtr(tensor_value, flatbuffer_graph, constant_data_ptr);
+  const uint8_t* buffer_ptr = getConstantDataPtr(
+      tensor_value,
+      flatbuffer_graph,
+      constant_data_ptr,
+      named_data_map,
+      loaded_buffers_from_map);
 
   xnn_status status;
   // The type we might have to convert to
@@ -1968,6 +2000,7 @@ ET_NODISCARD Error XNNCompiler::compileModel(
     size_t num_bytes,
     XNNExecutor* executor,
     MemoryAllocator* runtime_allocator,
+    const NamedDataMap* named_data_map,
     xnn_workspace_t workspace) {
   Result<XNNHeader> header = XNNHeader::Parse(buffer_pointer, num_bytes);
   const uint8_t* flatbuffer_data = nullptr;
@@ -2036,6 +2069,7 @@ ET_NODISCARD Error XNNCompiler::compileModel(
   std::vector<uint32_t> input_ids;
   std::vector<uint32_t> output_ids;
   Error err = Error::Ok;
+  std::vector<FreeableBuffer> loaded_buffers_from_map;
   for (auto value : *flatbuffer_graph->xvalues()) {
     err = defineTensor(
         subgraph.get(),
@@ -2045,7 +2079,9 @@ ET_NODISCARD Error XNNCompiler::compileModel(
         constant_data,
         input_ids,
         output_ids,
-        compile_allocator);
+        compile_allocator,
+        named_data_map,
+        loaded_buffers_from_map);
 
     if (err != Error::Ok) {
       return err;
diff --git a/backends/xnnpack/runtime/XNNCompiler.h b/backends/xnnpack/runtime/XNNCompiler.h
@@ -30,6 +30,7 @@ class XNNCompiler {
       size_t num_bytes,
       XNNExecutor* executor,
       executorch::runtime::MemoryAllocator* runtime_allocator,
+      const executorch::runtime::NamedDataMap* named_data_map,
       xnn_workspace_t workspace);
 };
 
diff --git a/backends/xnnpack/runtime/XNNPACKBackend.cpp b/backends/xnnpack/runtime/XNNPACKBackend.cpp
@@ -10,7 +10,7 @@
 #include <executorch/runtime/backend/interface.h>
 #include <executorch/runtime/core/error.h>
 #include <executorch/runtime/core/evalue.h>
-#include <executorch/runtime/platform/profiler.h>
+#include <executorch/runtime/executor/pte_data_map.h>
 
 #include <memory>
 #include <mutex>
@@ -29,6 +29,7 @@ using executorch::runtime::DelegateHandle;
 using executorch::runtime::Error;
 using executorch::runtime::EValue;
 using executorch::runtime::FreeableBuffer;
+using executorch::runtime::NamedDataMap;
 using executorch::runtime::Result;
 
 class XnnpackBackend final : public ::executorch::runtime::BackendInterface {
@@ -79,13 +80,14 @@ class XnnpackBackend final : public ::executorch::runtime::BackendInterface {
       return Error::MemoryAllocationFailed;
     }
 
+    const NamedDataMap* named_data_map = context.get_named_data_map();
+
 #ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
     // This is needed to serialize access to xnn_create_runtime which is not
     // thread safe. This can heppen when multiple threads call init() on
     // the same backend instance.
     const std::lock_guard<std::mutex> lock(workspace_mutex_);
 #endif
-
     // Executor has been allocated but not constructed, ensure that runtime_ is
     // nullptr by constructing it in place here. NOTE: Since we use placement
     // new and since this type is not trivially destructible, we must call the
@@ -96,6 +98,7 @@ class XnnpackBackend final : public ::executorch::runtime::BackendInterface {
         processed->size(),
         executor,
         context.get_runtime_allocator(),
+        named_data_map,
         workspace_.get());
     // This backend does not need its processed data after compiling the model.
     processed->Free();
diff --git a/backends/xnnpack/serialization/runtime_schema.fbs b/backends/xnnpack/serialization/runtime_schema.fbs
@@ -320,11 +320,20 @@ table XNNLeakyReLU {
 table ConstantDataOffset {
   // Constant data offsets are relative to the constant data base offset provided
   // in the XNNPACKHeader.
+  // named_key and offset are mutually exclusive, meaning only one of these values
+  // are valid. If the named key is a non-empty string, then the offset must be UINT64_MAX.
+  // If the offset is not UINT64_MAX, then the named key must be an empty string
   offset: uint64;
 
   // The size in bytes of valid data starting at the offset. The constant data
   // may be followed by padding before the next piece of constant data
   size: uint64;
+
+  // unique string id used to query the offset from the named data store.
+  // named_key and offset are mutually exclusive, meaning only one of these values
+  // are valid. If the named key is a non-empty string, then the offset must be UINT64_MAX.
+  // If the offset is not UINT64_MAX, then the named key must be an empty string
+  named_key: string;
 }
 
 table XNNGraph {
diff --git a/backends/xnnpack/targets.bzl b/backends/xnnpack/targets.bzl
@@ -60,6 +60,7 @@ def define_common_targets():
             "//executorch/backends/xnnpack/serialization:xnnpack_flatbuffer_header",
             "//executorch/extension/threadpool:threadpool",
             "//executorch/runtime/core/exec_aten/util:tensor_util",
+            "//executorch/runtime/executor:pte_data_map"
         ],
         # XnnpackBackend.cpp needs to compile with executor as whole
         # @lint-ignore BUCKLINT: Avoid `link_whole=True` (https://fburl.com/avoid-link-whole)