Handle XNNHeader in XNNPACK Runtime (#1543)

mcr229 · facebook-github-bot · commit 77defc63a907 · 2024-01-11T15:10:24.000-08:00
Summary:

We introduce XNNHeader on runtime side to handle the newly introduced XNNHeader ahead of time. XNNHeader manages the offsets and sizes of the flatbuffer payload and the constant data payload so that it is accessible by the XNNCompiler

It is important to note that on serialization side, we have not yet switched our serialization method to `serialize_xnnpack_binary` so this does not yet use the new serialization format. However, passing tests on this illustrates BC as old models will still be able to run on this new runtime.

Passing tests here show that the Header Magic correctly works in discerning between using the XNNHeader and the Flatbuffer header

Reviewed By: digantdesai

Differential Revision: D52556131
diff --git a/backends/xnnpack/runtime/XNNCompiler.cpp b/backends/xnnpack/runtime/XNNCompiler.cpp
@@ -7,6 +7,7 @@
  */
 
 #include <executorch/backends/xnnpack/runtime/XNNCompiler.h>
+#include <executorch/backends/xnnpack/runtime/XNNHeader.h>
 #include <executorch/backends/xnnpack/schema_generated.h>
 #include <executorch/backends/xnnpack/threadpool/threadpool.h>
 #include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
@@ -103,6 +104,34 @@ std::vector<T> flatbufferDimsToVector(
   return dims_data;
 }
 
+/**
+Gets the constant data pointer associated with the given tensor value.
+Obtaining the constant data pointer can either be from within the flatbuffer
+payload (deprecated) or via offsets to the constant_data_ptr. If no constant
+data associated with the tensor value, then returns nullptr.
+*/
+const uint8_t* getConstantDataPtr(
+    const fb_xnnpack::XNNTensorValue* tensor_value,
+    GraphPtr flatbuffer_graph,
+    const uint8_t* constant_data_ptr) {
+  auto buffer_idx = tensor_value->constant_buffer_idx();
+  if (buffer_idx) {
+    if (!constant_data_ptr) {
+      // TODO(T172265611): Remove constant_buffer in flatbuffer path after BC
+      // window
+      const auto& constant_buffer = *flatbuffer_graph->constant_buffer();
+      return constant_buffer[buffer_idx]->storage()->data();
+    } else {
+      const auto& constant_data_offsets = *flatbuffer_graph->constant_data();
+      uint64_t constant_data_offset =
+          constant_data_offsets[buffer_idx]->offset();
+      return constant_data_ptr + constant_data_offset;
+    }
+  }
+
+  return nullptr;
+}
+
 /**
 Define serialized tensor value into
 the subgraph. While also keeping track of the remapped ids from
@@ -113,6 +142,7 @@ Error defineTensor(
     std::unordered_map<uint32_t, uint32_t>& remapped_ids,
     ValuePtr value,
     GraphPtr flatbuffer_graph,
+    const uint8_t* constant_data_ptr,
     XNNExecutor* executor,
     MemoryAllocator* runtime_allocator) {
   const fb_xnnpack::XNNTensorValue* tensor_value = nullptr;
@@ -151,11 +181,9 @@ Error defineTensor(
 
   // Get Pointer to constant data from flatbuffer, if its non-constant
   // it is a nullptr
-  const auto& constant_buffer = *flatbuffer_graph->constant_buffer();
-  auto buffer_idx = tensor_value->constant_buffer_idx();
-  const auto buffer_ptr = buffer_idx == 0
-      ? nullptr
-      : constant_buffer[buffer_idx]->storage()->data();
+  const uint8_t* buffer_ptr =
+      getConstantDataPtr(tensor_value, flatbuffer_graph, constant_data_ptr);
+
   xnn_status status;
   // The type we might have to convert to
   auto dq_datatype = getDataType(tensor_value->dq_datatype());
@@ -1429,14 +1457,31 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
     size_t num_bytes,
     XNNExecutor* executor,
     MemoryAllocator* runtime_allocator) {
+  Result<XNNHeader> header = XNNHeader::Parse(buffer_pointer, num_bytes);
+  const uint8_t* flatbuffer_data = nullptr;
+  const uint8_t* constant_data = nullptr;
+
+  // Header status can only either be Error::Ok or Error::NotFound
+  if (header.ok()) {
+    flatbuffer_data = reinterpret_cast<const uint8_t*>(buffer_pointer) +
+        header->flatbuffer_offset;
+    constant_data = reinterpret_cast<const uint8_t*>(buffer_pointer) +
+        header->constant_data_offset;
+  } else if (header.error() == Error::NotFound) {
+    flatbuffer_data = reinterpret_cast<const uint8_t*>(buffer_pointer);
+  } else {
+    ET_LOG(Error, "XNNHeader may be corrupt");
+    return header.error();
+  }
+
   ET_CHECK_OR_RETURN_ERROR(
-      fb_xnnpack::XNNGraphBufferHasIdentifier(buffer_pointer),
+      fb_xnnpack::XNNGraphBufferHasIdentifier(flatbuffer_data),
       DelegateInvalidCompatibility,
       "XNNPACK Delegate Serialization Format version identifier '%.4s' != expected '%.4s'",
-      flatbuffers::GetBufferIdentifier(buffer_pointer),
+      flatbuffers::GetBufferIdentifier(flatbuffer_data),
       fb_xnnpack::XNNGraphIdentifier());
 
-  auto flatbuffer_graph = fb_xnnpack::GetXNNGraph(buffer_pointer);
+  auto flatbuffer_graph = fb_xnnpack::GetXNNGraph(flatbuffer_data);
   // initialize xnnpack
   xnn_status status = xnn_initialize(/*allocator =*/nullptr);
   ET_CHECK_OR_RETURN_ERROR(
@@ -1476,6 +1521,7 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
         remapped_ids,
         value,
         flatbuffer_graph,
+        constant_data,
         executor,
         runtime_allocator);
 
diff --git a/backends/xnnpack/runtime/XNNHeader.cpp b/backends/xnnpack/runtime/XNNHeader.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/xnnpack/runtime/XNNHeader.h>
+
+#include <cstring>
+
+#include <executorch/runtime/core/error.h>
+#include <executorch/runtime/core/result.h>
+
+#pragma clang diagnostic ignored "-Wdeprecated"
+
+namespace torch {
+namespace executor {
+namespace xnnpack {
+namespace delegate {
+
+namespace {
+/// Interprets the 8 bytes at `data` as a little-endian uint64_t.
+uint64_t GetUInt64LE(const uint8_t* data) {
+  return (uint64_t)data[0] | ((uint64_t)data[1] << 8) |
+      ((uint64_t)data[2] << 16) | ((uint64_t)data[3] << 24) |
+      ((uint64_t)data[4] << 32) | ((uint64_t)data[5] << 40) |
+      ((uint64_t)data[6] << 48) | ((uint64_t)data[7] << 56);
+}
+
+/// Interprets the 4 bytes at `data` as a little-endian uint32_t.
+uint32_t GetUInt32LE(const uint8_t* data) {
+  return (uint32_t)data[0] | ((uint32_t)data[1] << 8) |
+      ((uint32_t)data[2] << 16) | ((uint32_t)data[3] << 24);
+}
+
+} // namespace
+
+Result<XNNHeader> XNNHeader::Parse(const void* data, size_t size) {
+  const uint8_t* header_data = (const uint8_t*)data;
+
+  if (size < XNNHeader::kMinSize) {
+    return Error::InvalidArgument;
+  }
+
+  const uint8_t* magic_start = header_data + XNNHeader::kMagicOffset;
+  if (std::memcmp(magic_start, XNNHeader::kMagic, XNNHeader::kMagicSize) != 0) {
+    return Error::NotFound;
+  }
+
+  uint32_t flatbuffer_offset =
+      GetUInt32LE(header_data + XNNHeader::kFlatbufferDataOffsetOffset);
+
+  uint32_t flatbuffer_size =
+      GetUInt32LE(header_data + XNNHeader::kFlatbufferDataSizeOffset);
+
+  uint32_t constant_data_offset =
+      GetUInt32LE(header_data + XNNHeader::kConstantDataOffsetOffset);
+
+  uint64_t constant_data_size =
+      GetUInt64LE(header_data + XNNHeader::kConstantDataSizeOffset);
+
+  return XNNHeader{
+      flatbuffer_offset,
+      flatbuffer_size,
+      constant_data_offset,
+      constant_data_size};
+}
+
+// Define storage for the static.
+constexpr char XNNHeader::kMagic[kMagicSize];
+
+} // namespace delegate
+} // namespace xnnpack
+} // namespace executor
+} // namespace torch
diff --git a/backends/xnnpack/runtime/XNNHeader.h b/backends/xnnpack/runtime/XNNHeader.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/runtime/core/result.h>
+
+namespace torch {
+namespace executor {
+namespace xnnpack {
+namespace delegate {
+
+/**
+ * An extended XNNPACK-header that is embeded before the flatbuffer payload
+ *
+ */
+struct XNNHeader {
+  /**
+   * The minimum size of the XNNHeader. The caller should provide at least this
+   * many bytes of the head of the serialized XNNPACK Data
+   */
+  static constexpr size_t kMinSize = 30;
+
+  /**
+   * The magic offset. This offset is the same as the offset for flatbuffer
+   * header so we will be able to check if the header is is either the
+   * flatbuffer head or the wrapper header we introduce here
+   */
+  static constexpr size_t kMagicOffset = 4;
+
+  /**
+   * The magic bytes that identify the header.
+   *
+   * This is the canonical definition of the expected value. If the header
+   * layout ever changes in a compatibility-breaking way, increment the digits
+   * in the magic. But, doing so will prevent older binaries from recognizing
+   * the presence of the header. The compatibility-preserving way to make
+   * changes is to increase the header's length field and add new fields at the
+   * end.
+   */
+  static constexpr size_t kMagicSize = 4;
+  static constexpr char kMagic[kMagicSize] = {'X', 'H', '0', '0'};
+
+  /**
+   * The size in bytes of the header length. We store 2 bytes for the header
+   * length
+   */
+  static constexpr size_t kHeaderLengthSize = 2;
+
+  /**
+   * The expected location of the header length field relative to the beginning
+   * of the header.
+   */
+  static constexpr size_t kHeaderLengthOffset =
+      XNNHeader::kMagicOffset + XNNHeader::kMagicSize;
+
+  /**
+   * The expected location of the flatbuffer data offset field relative to the
+   * beginning of the header.
+   */
+  static constexpr size_t kFlatbufferDataOffsetOffset =
+      kHeaderLengthOffset + sizeof(uint16_t);
+
+  /**
+   * The expected location of the flatbuffer data size field relative to the
+   * beginning of the header.
+   */
+  static constexpr size_t kFlatbufferDataSizeOffset =
+      kFlatbufferDataOffsetOffset + sizeof(uint32_t);
+
+  /*
+   * The expected location of the constant data offset field relative to the
+   * beginning of the header.
+   */
+  static constexpr size_t kConstantDataOffsetOffset =
+      kFlatbufferDataSizeOffset + sizeof(uint32_t);
+
+  /*
+   * The expected location of the constant data size field relative to the
+   * beginning of the header.
+   */
+  static constexpr size_t kConstantDataSizeOffset =
+      kConstantDataOffsetOffset + sizeof(uint32_t);
+
+  /**
+   * Look for and parse an ExtendedHeader in the provided data.
+   *
+   * @param[in] data The contents of the beginning of the serialized binary
+   *     Program data, starting at offset 0 (i.e., the head of the file).
+   * @param[in] size Length of `data` in bytes.
+   *
+   * @returns an XNNHeader if the header was found and is valid. Returns an
+   *     error if size was too short, if the header was not found, or if the
+   *     header appeared to be corrupt.
+   */
+  static Result<XNNHeader> Parse(const void* data, size_t size);
+
+  /**
+   * The offset in bytes to the beginning of the flatbuffer data.
+   */
+  uint32_t flatbuffer_offset;
+  /**
+   * The size in bytes of the flatbuffer data.
+   */
+  uint32_t flatbuffer_size;
+
+  /**
+   * The offset in bytes to the beginning of the constant data.
+   */
+  uint32_t constant_data_offset;
+  /**
+   * The size in bytes of the constant data.
+   */
+  uint64_t constant_data_size;
+};
+
+} // namespace delegate
+} // namespace xnnpack
+} // namespace executor
+} // namespace torch