Skip to content

Commit 42a0505

Browse files
committed
Pull request pytorch#10: Feature/EIEX-33 neutron backend
Merge in AITEC/executorch from feature/EIEX-33-neutron-backend to main-nxp * commit '4f455df638bd23b887c009384304bd6cd27630b5': Initial implementation of the NeutronBackend.
2 parents 18f728b + 4f455df commit 42a0505

File tree

5 files changed

+384
-0
lines changed

5 files changed

+384
-0
lines changed

CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ option(EXECUTORCH_BUILD_ARM_BAREMETAL
147147
"Build the Arm Baremetal flow for Cortex-M and Ethos-U" OFF
148148
)
149149

150+
option(EXECUTORCH_BUILD_NXP_NEUTRON "Build the NXP Neutron NPU library" OFF)
151+
150152
option(EXECUTORCH_BUILD_COREML "Build the Core ML backend" OFF)
151153

152154
option(EXECUTORCH_BUILD_KERNELS_CUSTOM "Build the custom kernels" OFF)
@@ -575,6 +577,10 @@ if(EXECUTORCH_BUILD_ARM_BAREMETAL)
575577
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/arm)
576578
endif()
577579

580+
if(EXECUTORCH_BUILD_NXP_NEUTRON)
581+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/nxp)
582+
endif()
583+
578584
if(EXECUTORCH_BUILD_MPS)
579585
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/apple/mps)
580586
endif()

backends/nxp/CMakeLists.txt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Copyright 2024 NXP
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
cmake_minimum_required(VERSION 3.19)
7+
project(neutron_backend)
8+
9+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
10+
11+
# Source root directory for executorch.
12+
if(NOT EXECUTORCH_ROOT)
13+
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
14+
endif()
15+
16+
include(${EXECUTORCH_ROOT}/build/Utils.cmake)
17+
18+
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
19+
20+
set(_neutron_sources backends/nxp/runtime/NeutronBackend.cpp )
21+
22+
list(TRANSFORM _neutron_sources PREPEND "${EXECUTORCH_ROOT}/")
23+
24+
add_library(executorch_delegate_neutron STATIC ${_neutron_sources})
25+
target_include_directories(
26+
executorch_delegate_neutron PUBLIC ${_common_include_directories}
27+
)
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
/*
2+
* Copyright 2024 NXP
3+
*
4+
* This source code is licensed under the BSD-style license found in the
5+
* LICENSE file in the root directory of this source tree.
6+
*
7+
* Implementation of the backend for the NXP Neutron NPU.
8+
*/
9+
10+
#include <executorch/runtime/backend/interface.h>
11+
#include <executorch/runtime/core/error.h>
12+
#include <executorch/runtime/core/evalue.h>
13+
14+
#include "NeutronDriver.h"
15+
#include "NeutronErrors.h"
16+
17+
using namespace std;
18+
19+
namespace torch {
20+
namespace executor {
21+
22+
// Aggregate neutron model handle and data structures into one.
23+
typedef struct {
24+
int numInputs = 0;
25+
int numOutputs = 0;
26+
NeutronModelConfig mcfg;
27+
NeutronDataConfig dcfg;
28+
NeutronModelHandle nmh = NULL;
29+
} NeutronConfig;
30+
31+
class NeutronBackend final : public PyTorchBackendInterface {
32+
public:
33+
NeutronBackend() {}
34+
35+
~NeutronBackend() = default;
36+
37+
virtual bool is_available() const override {
38+
return true;
39+
}
40+
41+
Result<DelegateHandle*> init(
42+
BackendInitContext& context,
43+
FreeableBuffer* processed,
44+
ArrayRef<CompileSpec> compile_specs) const override {
45+
46+
MemoryAllocator* allocator = context.get_runtime_allocator();
47+
48+
auto *cfg = allocator->allocateInstance<NeutronConfig>();
49+
50+
// The following data is read from the "processed" data blob.
51+
// cfg->numInputs
52+
// cfg->numoutputs
53+
// cfg->mcfg.microcode
54+
// cfg->mcfg.weights
55+
// cfg->mcfg.kernels
56+
const uint32_t* buffer = static_cast<const uint32_t*>(processed->data());
57+
uint32_t magicWord = buffer[0];
58+
// Check valid microcode.
59+
if (magicWord != 0x64434D6E) {
60+
ET_LOG(Error, "Preprocessed buffer does not contain a valid Neutron microcode");
61+
return Error::InvalidProgram;
62+
}
63+
uint32_t microcodeSize = buffer[6];
64+
uint32_t weightsSize = buffer[7];
65+
cfg->numInputs = buffer[9];
66+
cfg->numOutputs = buffer[10];
67+
cfg->mcfg.microcode = static_cast<const uint8_t*>(processed->data());
68+
cfg->mcfg.weights = static_cast<const uint8_t*>(cfg->mcfg.microcode) + microcodeSize;
69+
cfg->mcfg.kernels = static_cast<const uint8_t*>(cfg->mcfg.weights) + weightsSize;
70+
71+
// Allocate place for input and output pointers.
72+
cfg->dcfg.inputs = static_cast<const void**>(allocator->allocate(cfg->numInputs * sizeof(void*)));
73+
cfg->dcfg.outputs = static_cast<void**>(allocator->allocate(cfg->numOutputs * sizeof(void*)));
74+
75+
// Prepare data for through neutron driver.
76+
NeutronError neutronRC = neutronModelPrepare((const NeutronModelConfig *)&cfg->mcfg, &cfg->nmh);
77+
if (neutronRC != ENONE) {
78+
ET_LOG(Error, "Neutron model preparation failed with error code %d", neutronRC);
79+
return Error::InvalidProgram;
80+
}
81+
82+
return cfg;
83+
}
84+
85+
Error execute(
86+
BackendExecutionContext& context,
87+
DelegateHandle* input_handle,
88+
EValue** args) const override {
89+
90+
NeutronConfig *cfg = static_cast<NeutronConfig *>(input_handle);
91+
92+
// Set inputs and outputs from args.
93+
for (int i = 0; i < cfg->numInputs; i++) {
94+
cfg->dcfg.inputs[i] = args[i]->toTensor().const_data_ptr();
95+
}
96+
for (int i = 0; i < cfg->numOutputs; i++) {
97+
cfg->dcfg.outputs[i] = args[cfg->numInputs + i]->toTensor().mutable_data_ptr();
98+
}
99+
100+
// TODO: Use trace from BackendExecutionContext.
101+
NeutronTraceConfig trace_config{.traceConfig = 0};
102+
neutronSetTrace(cfg->nmh, &trace_config);
103+
104+
// Run neutron compute.
105+
NeutronError neutronRC = neutronRunBlocking(cfg->nmh, &cfg->dcfg);
106+
if (neutronRC != ENONE) {
107+
ET_LOG(Error, "Neutron model evaluation failed with error code %d", neutronRC);
108+
return Error::InvalidProgram;
109+
}
110+
111+
return Error::Ok;
112+
}
113+
114+
void destroy(DelegateHandle* handle) const override {
115+
NeutronConfig *cfg = reinterpret_cast<NeutronConfig *>(handle);
116+
117+
// Unprepare to free resources in neutron driver.
118+
NeutronError neutronRC = neutronModelUnprepare(cfg->nmh);
119+
(void)neutronRC;
120+
121+
// Deallocation is done automatically.
122+
/*
123+
delete[] cfg->dcfg.inputs;
124+
delete[] cfg->dcfg.outputs;
125+
delete cfg;
126+
*/
127+
return;
128+
}
129+
};
130+
131+
namespace {
132+
auto backend = NeutronBackend();
133+
Backend backend_id{"NeutronBackend", &backend};
134+
static auto registered = register_backend(backend_id);
135+
} // namespace
136+
137+
} // namespace executor
138+
} // namespace torch

backends/nxp/runtime/NeutronDriver.h

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
/*
2+
* Copyright 2024 NXP
3+
*
4+
* SPDX-License-Identifier: BSD-3-Clause
5+
*
6+
* Interface for the NXP Neutron NPU driver.
7+
*/
8+
9+
#ifndef NEUTRON_DRIVER_H
10+
#define NEUTRON_DRIVER_H
11+
12+
#ifdef __cplusplus
13+
extern "C" {
14+
#endif
15+
#include <stddef.h>
16+
#include <stdint.h>
17+
#include <stdbool.h>
18+
19+
#include "NeutronErrors.h"
20+
21+
/* Neutron Driver error category codes */
22+
typedef enum ERROR_CATEGORY_DRIVER {
23+
ERROR_CATEGORY_DRIVER_GENERIC, /* Generic error category */
24+
ERROR_CATEGORY_DRIVER_UNSUPPORTED, /* Unsupported function */
25+
ERROR_CATEGORY_DRIVER_UCODE, /* Microcode bad magic or version incompatible. */
26+
ERROR_CATEGORY_DRIVER_INVALID, /* Invalid arguments */
27+
ERROR_CATEGORY_DRIVER_BAD_HANDLE, /* Bad inference handle */
28+
ERROR_CATEGORY_DRIVER_NO_MEMORY, /* Not enough memory */
29+
ERROR_CATEGORY_DRIVER_INTERNAL_FAULT, /* Internal error */
30+
ERROR_CATEGORY_DRIVER_UNKNOWN_ARCH, /* Unknown architecture */
31+
ERROR_CATEGORY_DRIVER_TRACE_NOT_RUN, /* Tracing did not run, but trace buffer was requested. */
32+
ERROR_CATEGORY_DRIVER_TIMEOUT /* Timeout error. */
33+
} ERROR_CATEGORY_DRIVER;
34+
35+
/// Trace configuration to enable kernel level tracing.
36+
#define TRACE_CONFIG_KERNEL_LEVEL (1U << 0)
37+
38+
/// Trace confinguration to enable job level tracing.
39+
#define TRACE_CONFIG_JOB_LEVEL (1U << 1)
40+
41+
// Macro to define where to allocate memory for NeutronCtx
42+
#ifndef NO_HEAP_USAGE
43+
#define NO_HEAP_USAGE 0
44+
#endif
45+
46+
/* Neutron Driver errors */
47+
#define GEN_NEUTRON_DRIVER_ERROR(category, code) GEN_NEUTRON_ERROR(ERROR_COMPONENT_DRIVER, category, code)
48+
#define GEN_NEUTRON_DRIVER_GENERIC_ERROR() GEN_NEUTRON_DRIVER_ERROR(ERROR_CATEGORY_DRIVER_GENERIC, __LINE__)
49+
50+
/// Type definition for a Neutron model handle. This is an identifier used to uniquely identify a model.
51+
/// The convention is that the value NEUTRON_INVALID_HANDLE handle corresponds to an invalid handle.
52+
typedef void *NeutronModelHandle;
53+
54+
typedef struct {
55+
/// Neutron microcode buffer address.
56+
/// The Neutron microcode is generated by the Neutron converter tool.
57+
/// The microcode buffer is allocated and initialized by the application or ML framework.
58+
/// The microcode buffer is passed by reference to the Neutron firmware.
59+
/// The microcode buffer is specific for a given ML model.
60+
const void *microcode;
61+
62+
/// Neutron weights buffer address.
63+
/// The Neutron weights is generated by the Neutron converter tool.
64+
/// The weights buffer is allocated and initialized by the application or ML framework.
65+
/// The weights buffer address is passed by reference to the Neutron-firmware.
66+
/// The weights buffer is specific for a given ML model.
67+
const void *weights;
68+
69+
/// Neutron kernels buffer address.
70+
/// The Neutron kernels are generated by the Neutron converter tool.
71+
/// The kernels buffer is allocated and initialized by the application or ML framework.
72+
/// The kernels buffer address is passed by reference to the Neutron-firmware.
73+
/// The kernels buffer is specific for a given ML model.
74+
const void *kernels;
75+
76+
/// Timeout seconds for the microcode running.
77+
/// This timeout is the uplimit seconds that a user expect to complete, default 60.
78+
uint32_t timeoutSeconds;
79+
80+
} NeutronModelConfig;
81+
82+
typedef struct {
83+
/// The input buffers of the model.
84+
/// The input buffers are allocated and initialized by the application or ML framework.
85+
/// The input buffers are passed by reference to the Neutron firmware.
86+
const void **inputs;
87+
88+
/// The output buffers of the model.
89+
/// The output buffers are allocated by the application or ML framework.
90+
/// The output buffers are passed by reference to the Neutron firmware.
91+
void **outputs;
92+
93+
/// Scratch buffer required for computing model intermediate results.
94+
/// If NULL, this buffer has to be allocated by the driver.
95+
void *scratch;
96+
97+
/// Scratch buffer required for prefetching model weights from FLASH to SRAM.
98+
/// This buffer is used only for Neutron-C targets when the weight prefetch option was explicitly used.
99+
/// If NULL, this buffer has to be allocated by the driver.
100+
void *scratchWeights;
101+
102+
} NeutronDataConfig;
103+
104+
typedef struct {
105+
/// Sets whether tracing should be executed during firmware run or not.
106+
/// If set to 0, tracing will not run.
107+
/// If set to 1 - kernel level tracing.
108+
/// If set to 2 - job level tracing.
109+
/// If set to 3 - mixed level tracing
110+
uint32_t traceConfig;
111+
112+
/// Buffer to store collected trace data.
113+
/// If it is NULLPTR, driver will allocate the memory, otherwise, application can.
114+
char *traceBuffer;
115+
116+
/// What is the allocated memory for buffer. Needed to check if appending string will be out of bounds.
117+
/// Application should set this, if the buffer is allocated by application, otherwise driver will set the value.
118+
size_t traceBufferSize;
119+
} NeutronTraceConfig;
120+
121+
/// This structure contains the prototypes for functions that have a custom implementation.
122+
/// Any new functions or variables must be added at the end.
123+
typedef struct {
124+
/// This function performs the copying from FLASH to SRAM.
125+
void (*copy)(void *dst, void *src, uint32_t size, uint32_t channel);
126+
/// This is a blocking function that checks if the current copy has finished.
127+
void (*wait)(uint32_t channel);
128+
} NeutronConfig;
129+
130+
/* Invalid handle, returned by neutronModelPrepare() if an error occurred. */
131+
#define NEUTRON_INVALID_HANDLE NULL
132+
133+
/// - Initialize the Neutron Driver library, setting initial values, do memory allocation
134+
/// for internal data structures, do memory mapping.
135+
NeutronError neutronInit();
136+
137+
/// - Deinitialize the Neutron Driver library, releasing any resources aquired by neutronInit
138+
NeutronError neutronDeinit();
139+
140+
/// - Prepare Neutron execution for a model with the given configuration.
141+
/// - This function only prepares the execution by transferring the parameters to the firmware.
142+
/// - This function allows caching a model and then running the same model but with different
143+
/// input data (assuming the new input data replaces the old input data by reusing the same buffers).
144+
/// - In case external allocated memory shall be used for the ModelHandle, e.g. from the Tensorflow
145+
/// tensor arena, hdl shall be a pointer to the start of the allocated memory block.
146+
// If a pointer to NULL is passed, memory will be allocated by the driver
147+
/// from HEAP. If no HEAP is available, an error will be thrown.
148+
NeutronError neutronModelPrepare(const NeutronModelConfig *mcfg, NeutronModelHandle *hdl);
149+
150+
/// - Unprepare Neutron execution handle.
151+
/// - This function releases the internal context data structures and the reserved handle.
152+
NeutronError neutronModelUnprepare(NeutronModelHandle hdl);
153+
154+
/// - Perform Neutron execution in blocking mode.
155+
NeutronError neutronRunBlocking(NeutronModelHandle hdl, const NeutronDataConfig *dcfg);
156+
157+
/// - Perform Neutron execution in non-blocking mode.
158+
/// - This functionality is only available for Neutron-S.
159+
NeutronError neutronRunNonBlocking(NeutronModelHandle hdl, const NeutronDataConfig *dcfg);
160+
161+
/// - Wait (block) for Neutron completion.
162+
/// - This functionality is only available for Neutron-S.
163+
NeutronError neutronWait(NeutronModelHandle hdl, const NeutronDataConfig *dcfg);
164+
165+
/// - Query if the job is done by Neutron.
166+
/// - This functionality is only available for neutronRunNonBlocking.
167+
NeutronError neutronIsReady(NeutronModelHandle hdl, bool *isReady);
168+
169+
#ifndef NDEBUG
170+
/// - Set tracing information.
171+
void neutronSetTrace(NeutronModelHandle hdl, NeutronTraceConfig *tcfg);
172+
173+
/// - Get tracing result to buffer.
174+
NeutronError neutronGetTrace(NeutronModelHandle hdl, char **buffer, size_t *size);
175+
#endif
176+
177+
/// - Perform power management to suspend Neutron hardware.
178+
// - This function disables the clock for Neutron.
179+
NeutronError neutronSuspend();
180+
181+
/// - Perform power management to resume Neutron hardware.
182+
// - This function enables the clock for Neutron.
183+
NeutronError neutronResume();
184+
185+
/// - Used to initialize custom API's or variables implemented by external application.
186+
NeutronError neutronSetConfig(NeutronConfig *config);
187+
188+
/// - Used to get NeutronContext size.
189+
size_t neutronGetModelContextSize();
190+
191+
/// Other functions to control the state of driver/firmware.
192+
#ifdef __cplusplus
193+
}
194+
#endif
195+
#endif // NEUTRON_DRIVER_H

0 commit comments

Comments
 (0)