Skip to content

Handle XNNHeader in XNNPACK Runtime #1543

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 54 additions & 8 deletions backends/xnnpack/runtime/XNNCompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/

#include <executorch/backends/xnnpack/runtime/XNNCompiler.h>
#include <executorch/backends/xnnpack/runtime/XNNHeader.h>
#include <executorch/backends/xnnpack/schema_generated.h>
#include <executorch/backends/xnnpack/threadpool/threadpool.h>
#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
Expand Down Expand Up @@ -103,6 +104,34 @@ std::vector<T> flatbufferDimsToVector(
return dims_data;
}

/**
Gets the constant data pointer associated with the given tensor value.
Obtaining the constant data pointer can either be from within the flatbuffer
payload (deprecated) or via offsets to the constant_data_ptr. If no constant
data associated with the tensor value, then returns nullptr.
*/
const uint8_t* getConstantDataPtr(
const fb_xnnpack::XNNTensorValue* tensor_value,
GraphPtr flatbuffer_graph,
const uint8_t* constant_data_ptr) {
auto buffer_idx = tensor_value->constant_buffer_idx();
if (buffer_idx) {
if (!constant_data_ptr) {
// TODO(T172265611): Remove constant_buffer in flatbuffer path after BC
// window
const auto& constant_buffer = *flatbuffer_graph->constant_buffer();
return constant_buffer[buffer_idx]->storage()->data();
} else {
const auto& constant_data_offsets = *flatbuffer_graph->constant_data();
uint64_t constant_data_offset =
constant_data_offsets[buffer_idx]->offset();
return constant_data_ptr + constant_data_offset;
}
}

return nullptr;
}

/**
Define serialized tensor value into
the subgraph. While also keeping track of the remapped ids from
Expand All @@ -113,6 +142,7 @@ Error defineTensor(
std::unordered_map<uint32_t, uint32_t>& remapped_ids,
ValuePtr value,
GraphPtr flatbuffer_graph,
const uint8_t* constant_data_ptr,
XNNExecutor* executor,
MemoryAllocator* runtime_allocator) {
const fb_xnnpack::XNNTensorValue* tensor_value = nullptr;
Expand Down Expand Up @@ -151,11 +181,9 @@ Error defineTensor(

// Get Pointer to constant data from flatbuffer, if its non-constant
// it is a nullptr
const auto& constant_buffer = *flatbuffer_graph->constant_buffer();
auto buffer_idx = tensor_value->constant_buffer_idx();
const auto buffer_ptr = buffer_idx == 0
? nullptr
: constant_buffer[buffer_idx]->storage()->data();
const uint8_t* buffer_ptr =
getConstantDataPtr(tensor_value, flatbuffer_graph, constant_data_ptr);

xnn_status status;
// The type we might have to convert to
auto dq_datatype = getDataType(tensor_value->dq_datatype());
Expand Down Expand Up @@ -1429,14 +1457,31 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
size_t num_bytes,
XNNExecutor* executor,
MemoryAllocator* runtime_allocator) {
Result<XNNHeader> header = XNNHeader::Parse(buffer_pointer, num_bytes);
const uint8_t* flatbuffer_data = nullptr;
const uint8_t* constant_data = nullptr;

// Header status can only either be Error::Ok or Error::NotFound
if (header.ok()) {
flatbuffer_data = reinterpret_cast<const uint8_t*>(buffer_pointer) +
header->flatbuffer_offset;
constant_data = reinterpret_cast<const uint8_t*>(buffer_pointer) +
header->constant_data_offset;
} else if (header.error() == Error::NotFound) {
flatbuffer_data = reinterpret_cast<const uint8_t*>(buffer_pointer);
} else {
ET_LOG(Error, "XNNHeader may be corrupt");
return header.error();
}

ET_CHECK_OR_RETURN_ERROR(
fb_xnnpack::XNNGraphBufferHasIdentifier(buffer_pointer),
fb_xnnpack::XNNGraphBufferHasIdentifier(flatbuffer_data),
DelegateInvalidCompatibility,
"XNNPACK Delegate Serialization Format version identifier '%.4s' != expected '%.4s'",
flatbuffers::GetBufferIdentifier(buffer_pointer),
flatbuffers::GetBufferIdentifier(flatbuffer_data),
fb_xnnpack::XNNGraphIdentifier());

auto flatbuffer_graph = fb_xnnpack::GetXNNGraph(buffer_pointer);
auto flatbuffer_graph = fb_xnnpack::GetXNNGraph(flatbuffer_data);
// initialize xnnpack
xnn_status status = xnn_initialize(/*allocator =*/nullptr);
ET_CHECK_OR_RETURN_ERROR(
Expand Down Expand Up @@ -1476,6 +1521,7 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
remapped_ids,
value,
flatbuffer_graph,
constant_data,
executor,
runtime_allocator);

Expand Down
77 changes: 77 additions & 0 deletions backends/xnnpack/runtime/XNNHeader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/backends/xnnpack/runtime/XNNHeader.h>

#include <cstring>

#include <executorch/runtime/core/error.h>
#include <executorch/runtime/core/result.h>

#pragma clang diagnostic ignored "-Wdeprecated"

namespace torch {
namespace executor {
namespace xnnpack {
namespace delegate {

namespace {
/// Interprets the 8 bytes at `data` as a little-endian uint64_t.
uint64_t GetUInt64LE(const uint8_t* data) {
return (uint64_t)data[0] | ((uint64_t)data[1] << 8) |
((uint64_t)data[2] << 16) | ((uint64_t)data[3] << 24) |
((uint64_t)data[4] << 32) | ((uint64_t)data[5] << 40) |
((uint64_t)data[6] << 48) | ((uint64_t)data[7] << 56);
}

/// Interprets the 4 bytes at `data` as a little-endian uint32_t.
uint32_t GetUInt32LE(const uint8_t* data) {
return (uint32_t)data[0] | ((uint32_t)data[1] << 8) |
((uint32_t)data[2] << 16) | ((uint32_t)data[3] << 24);
}

} // namespace

Result<XNNHeader> XNNHeader::Parse(const void* data, size_t size) {
const uint8_t* header_data = (const uint8_t*)data;

if (size < XNNHeader::kMinSize) {
return Error::InvalidArgument;
}

const uint8_t* magic_start = header_data + XNNHeader::kMagicOffset;
if (std::memcmp(magic_start, XNNHeader::kMagic, XNNHeader::kMagicSize) != 0) {
return Error::NotFound;
}

uint32_t flatbuffer_offset =
GetUInt32LE(header_data + XNNHeader::kFlatbufferDataOffsetOffset);

uint32_t flatbuffer_size =
GetUInt32LE(header_data + XNNHeader::kFlatbufferDataSizeOffset);

uint32_t constant_data_offset =
GetUInt32LE(header_data + XNNHeader::kConstantDataOffsetOffset);

uint64_t constant_data_size =
GetUInt64LE(header_data + XNNHeader::kConstantDataSizeOffset);

return XNNHeader{
flatbuffer_offset,
flatbuffer_size,
constant_data_offset,
constant_data_size};
}

// Define storage for the static.
constexpr char XNNHeader::kMagic[kMagicSize];

} // namespace delegate
} // namespace xnnpack
} // namespace executor
} // namespace torch
125 changes: 125 additions & 0 deletions backends/xnnpack/runtime/XNNHeader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <executorch/runtime/core/result.h>

namespace torch {
namespace executor {
namespace xnnpack {
namespace delegate {

/**
* An extended XNNPACK-header that is embeded before the flatbuffer payload
*
*/
struct XNNHeader {
/**
* The minimum size of the XNNHeader. The caller should provide at least this
* many bytes of the head of the serialized XNNPACK Data
*/
static constexpr size_t kMinSize = 30;

/**
* The magic offset. This offset is the same as the offset for flatbuffer
* header so we will be able to check if the header is is either the
* flatbuffer head or the wrapper header we introduce here
*/
static constexpr size_t kMagicOffset = 4;

/**
* The magic bytes that identify the header.
*
* This is the canonical definition of the expected value. If the header
* layout ever changes in a compatibility-breaking way, increment the digits
* in the magic. But, doing so will prevent older binaries from recognizing
* the presence of the header. The compatibility-preserving way to make
* changes is to increase the header's length field and add new fields at the
* end.
*/
static constexpr size_t kMagicSize = 4;
static constexpr char kMagic[kMagicSize] = {'X', 'H', '0', '0'};

/**
* The size in bytes of the header length. We store 2 bytes for the header
* length
*/
static constexpr size_t kHeaderLengthSize = 2;

/**
* The expected location of the header length field relative to the beginning
* of the header.
*/
static constexpr size_t kHeaderLengthOffset =
XNNHeader::kMagicOffset + XNNHeader::kMagicSize;

/**
* The expected location of the flatbuffer data offset field relative to the
* beginning of the header.
*/
static constexpr size_t kFlatbufferDataOffsetOffset =
kHeaderLengthOffset + sizeof(uint16_t);

/**
* The expected location of the flatbuffer data size field relative to the
* beginning of the header.
*/
static constexpr size_t kFlatbufferDataSizeOffset =
kFlatbufferDataOffsetOffset + sizeof(uint32_t);

/*
* The expected location of the constant data offset field relative to the
* beginning of the header.
*/
static constexpr size_t kConstantDataOffsetOffset =
kFlatbufferDataSizeOffset + sizeof(uint32_t);

/*
* The expected location of the constant data size field relative to the
* beginning of the header.
*/
static constexpr size_t kConstantDataSizeOffset =
kConstantDataOffsetOffset + sizeof(uint32_t);

/**
* Look for and parse an ExtendedHeader in the provided data.
*
* @param[in] data The contents of the beginning of the serialized binary
* Program data, starting at offset 0 (i.e., the head of the file).
* @param[in] size Length of `data` in bytes.
*
* @returns an XNNHeader if the header was found and is valid. Returns an
* error if size was too short, if the header was not found, or if the
* header appeared to be corrupt.
*/
static Result<XNNHeader> Parse(const void* data, size_t size);

/**
* The offset in bytes to the beginning of the flatbuffer data.
*/
uint32_t flatbuffer_offset;
/**
* The size in bytes of the flatbuffer data.
*/
uint32_t flatbuffer_size;

/**
* The offset in bytes to the beginning of the constant data.
*/
uint32_t constant_data_offset;
/**
* The size in bytes of the constant data.
*/
uint64_t constant_data_size;
};

} // namespace delegate
} // namespace xnnpack
} // namespace executor
} // namespace torch
18 changes: 17 additions & 1 deletion backends/xnnpack/serialization/schema.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,17 @@ table XNNLeakyReLU {
flags: uint;
}

// Describes data offsets for constant data
table ConstantDataOffset {
// Constant data offsets are relative to the constant data base offset provided
// in the XNNPACKHeader.
offset: uint64;

// The size in bytes of valid data starting at the offset. The constant data
// may be followed by padding before the next piece of constant data
size: uint64;
}

table XNNGraph {
// Schema version.
version:string;
Expand All @@ -299,11 +310,16 @@ table XNNGraph {
// Tables of constant data, used for constant Values (e.g.
// data field of weight tensors). Each constant is assigned an index into the table
// which are each individually aligned. 0 index is reserved to be pointed to by non-constant
// Tensors
// Tensors. Exactly one of constant_buffer and constant_data must be non-empty
constant_buffer:[Buffer];

// the list index is memory buffer id, the value is the memory buffer size.
mem_buffer_sizes: [uint];

// List of the constant data that follows the XNNGraph in this file. Each constant data is assigned an index into
// the table. 0 index is reserved to be pointed to by non-constant Tensor. Exactly one of constant_buffer and
// constant_data must be non-empty
constant_data:[ConstantDataOffset];
}

root_type XNNGraph;
8 changes: 8 additions & 0 deletions backends/xnnpack/serialization/xnnpack_graph_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,12 @@ class Buffer:
storage: bytes


@dataclass
class ConstantDataOffset:
offset: int
size: int


@dataclass
class XNNGraph:
version: str
Expand All @@ -429,3 +435,5 @@ class XNNGraph:

constant_buffer: List[Buffer]
mem_buffer_sizes: List[int]

constant_data: List[ConstantDataOffset]
Loading