pytorch · tarun292 · Oct 9, 2023
@@ -134,6 +134,8 @@ option(EXECUTORCH_BUILD_SDK
        "Build the ExecuTorch SDK library and the SDK example runner.")
 
 option(EXECUTORCH_BUILD_EXAMPLES "Build the ExecuTorch examples.")
+option(EXECUTORCH_BUILD_XTENSA_EXAMPLE
+       "Build the example targeted for the Xtensa Hifi4 DSP" OFF)
 
 if(NOT BUCK2)
   set(BUCK2 buck2)
@@ -348,5 +350,9 @@ if(EXECUTORCH_BUILD_EXAMPLES)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/examples)
 endif()
 
+if(EXECUTORCH_BUILD_XTENSA_EXAMPLE)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/examples/xtensa)
+endif()
+
 # Print all summary
 executorch_print_configuration_summary()
@@ -0,0 +1,108 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Set the minimum required version of CMake for this project.
+cmake_minimum_required(VERSION 3.10)
+
+if(NOT CMAKE_CXX_STANDARD)
+  set(CMAKE_CXX_STANDARD 17)
+endif()
+
+# Set the project name.
+project(xtensa_executorch_example)
+
+# Source root directory for executorch.
+if(NOT EXECUTORCH_ROOT)
+  set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
+endif()
+
+add_compile_options(
+  -DSDK_DEBUGCONSOLE=1
+  -DSERIAL_PORT_TYPE_UART=1
+  -DDEBUG_CONSOLE_RX_ENABLE=0
+  -DDEBUG
+  -DCPU_MIMXRT685SFVKB_dsp
+  -DMCUXPRESSO_SDK
+  -g
+  -O0
+  -Wall
+  -fsigned-char
+  -Wno-missing-braces
+  -fmessage-length=0
+  -DPRINTF_FLOAT_ENABLE=1)
+
+if(NOT DEFINED NXP_SDK_ROOT_DIR)
+  message(FATAL_ERROR "NXP_SDK_ROOT_DIR is not set")
+endif()
+
+# lint_cmake: -linelength
+set(SOURCES
+    ${NXP_SDK_ROOT_DIR}/components/lists/fsl_component_generic_list.c
+    ${NXP_SDK_ROOT_DIR}/components/uart/fsl_adapter_usart.c
+    ${NXP_SDK_ROOT_DIR}/devices/MIMXRT685S/drivers/fsl_clock.c
+    ${NXP_SDK_ROOT_DIR}/devices/MIMXRT685S/drivers/fsl_common.c
+    ${NXP_SDK_ROOT_DIR}/devices/MIMXRT685S/drivers/fsl_common_dsp.c
+    ${NXP_SDK_ROOT_DIR}/devices/MIMXRT685S/drivers/fsl_flexcomm.c
+    ${NXP_SDK_ROOT_DIR}/devices/MIMXRT685S/drivers/fsl_gpio.c
+    ${NXP_SDK_ROOT_DIR}/devices/MIMXRT685S/drivers/fsl_mu.c
+    ${NXP_SDK_ROOT_DIR}/devices/MIMXRT685S/drivers/fsl_reset.c
+    ${NXP_SDK_ROOT_DIR}/devices/MIMXRT685S/drivers/fsl_usart.c
+    ${NXP_SDK_ROOT_DIR}/devices/MIMXRT685S/system_MIMXRT685S_dsp.c
+    ${NXP_SDK_ROOT_DIR}/devices/MIMXRT685S/utilities/debug_console_lite/fsl_assert.c
+    ${NXP_SDK_ROOT_DIR}/devices/MIMXRT685S/utilities/debug_console_lite/fsl_debug_console.c
+    ${NXP_SDK_ROOT_DIR}/boards/evkmimxrt685/dsp_examples/mu_polling/dsp/board_hifi4.c
+    ${NXP_SDK_ROOT_DIR}/boards/evkmimxrt685/dsp_examples/mu_polling/dsp/pin_mux.c
+    ${NXP_SDK_ROOT_DIR}/devices/MIMXRT685S/utilities/str/fsl_str.c)
+
+add_library(dsp_mu_polling_libs STATIC ${SOURCES})
+
+target_include_directories(
+  dsp_mu_polling_libs
+  PUBLIC ${NXP_SDK_ROOT_DIR}
+         ${NXP_SDK_ROOT_DIR}/components/uart
+         ${NXP_SDK_ROOT_DIR}/devices/MIMXRT685S/drivers
+         ${NXP_SDK_ROOT_DIR}/devices/MIMXRT685S/utilities/debug_console_lite
+         ${NXP_SDK_ROOT_DIR}/components/lists
+         ${NXP_SDK_ROOT_DIR}/devices/MIMXRT685S
+         ${NXP_SDK_ROOT_DIR}/CMSIS/Core/Include
+         ${NXP_SDK_ROOT_DIR}/devices/MIMXRT685S/utilities/str
+         ${NXP_SDK_ROOT_DIR}/boards/evkmimxrt685/dsp_examples/mu_polling/dsp)
+
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ops)
+
+# Generate the model header file
+add_custom_command(
+  OUTPUT ${CMAKE_BINARY_DIR}/model_pte.h
+  COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/utils/gen_header.py
+          --model_path ${MODEL_PATH} --header_output_path ${CMAKE_BINARY_DIR}
+  COMMENT "Converting .pte model to header file..."
+  DEPENDS ${CMAKE_CURRENT_LIST_DIR}/utils/gen_header.py)
+
+add_custom_target(gen_model_header DEPENDS ${CMAKE_BINARY_DIR}/model_pte.h)
+
+add_executable(xtensa_executorch_example executor_runner.cpp)
+add_dependencies(xtensa_executorch_example gen_model_header)
+
+target_include_directories(xtensa_executorch_example PUBLIC ${ROOT_DIR}/..
+                                                            ${CMAKE_BINARY_DIR})
+
+target_link_options(xtensa_executorch_example PRIVATE
+                    -mlsp=${NXP_SDK_ROOT_DIR}/devices/MIMXRT685S/xtensa/min-rt)
+target_link_libraries(xtensa_executorch_example dsp_mu_polling_libs
+                      xtensa_ops_lib executorch)
+
+add_custom_command(
+  TARGET xtensa_executorch_example
+  POST_BUILD
+  COMMAND
+    ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/utils/post_compilation.py
+    ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME} ${CMAKE_BINARY_DIR}
+  COMMENT
+    "Generating .bin files that can be used to flash the DSP with. Copy over
+    the dsp_text_release.bin and dsp_data_release.bin that are generated into
+    your NXP MCUXpresso IDE workspace and flash the DSP with these binaries."
+    DEPENDS
+    ${CMAKE_CURRENT_LIST_DIR}/utils/post_compilation.py)
diff --git a/examples/xtensa/executor_runner.cpp b/examples/xtensa/executor_runner.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+/**
+ * @file
+ *
+ * This is a simple executor_runner that boots up the DSP, configures the serial
+ * port, sends a bunch of test messages to the M33 core and then loads the model
+ * defined in model_pte.h. It runs this model using the ops available in
+ * xtensa/ops directory.
+ */
+
+#include <fsl_debug_console.h>
+#include "fsl_device_registers.h"
+#include "fsl_mu.h"
+
+#include "board_hifi4.h"
+#include "model_pte.h"
+#include "pin_mux.h"
+
+#include <memory>
+#include <vector>
+
+#include <executorch/extension/data_loader/buffer_data_loader.h>
+#include <executorch/runtime/executor/method.h>
+#include <executorch/runtime/executor/program.h>
+#include <executorch/runtime/platform/log.h>
+#include <executorch/runtime/platform/profiler.h>
+#include <executorch/runtime/platform/runtime.h>
+#include <executorch/util/util.h>
+
+static uint8_t method_allocator_pool[18 * 1024U]; // 4 MB
+
+using namespace torch::executor;
+#include <xtensa/config/core.h>
+
+#define APP_MU MUB
+/* Flag indicates Core Boot Up*/
+#define BOOT_FLAG 0x01U
+/* Channel transmit and receive register */
+#define CHN_MU_REG_NUM 0U
+/* How many message is used to test message sending */
+#define MSG_LENGTH 32U
+
+using torch::executor::Error;
+using torch::executor::Result;
+
+void LED_INIT();
+void LED_TOGGLE();
+
+void LED_INIT() {
+  CLOCK_EnableClock(kCLOCK_HsGpio0);
+  RESET_PeripheralReset(kHSGPIO0_RST_SHIFT_RSTn);
+  gpio_pin_config_t pin_config = {kGPIO_DigitalOutput, LOGIC_LED_OFF};
+  GPIO_PinInit(
+      BOARD_LED_RED_GPIO,
+      BOARD_LED_RED_GPIO_PORT,
+      BOARD_LED_RED_GPIO_PIN,
+      &pin_config);
+}
+
+void LED_TOGGLE() {
+  LED_RED_TOGGLE();
+}
+
+/*!
+ * @brief Function to create delay for Led blink.
+ */
+void delay(void) {
+  volatile uint32_t i = 0;
+  for (i = 0; i < 5000000; ++i) {
+    __NOP();
+  }
+}
+
+void et_pal_emit_log_message(
+    et_timestamp_t timestamp,
+    et_pal_log_level_t level,
+    const char* filename,
+    __ET_UNUSED const char* function,
+    size_t line,
+    const char* message,
+    __ET_UNUSED size_t length) {
+  PRINTF("\r%s\n", message);
+}
+
+int main(int argc, char** argv) {
+  /* Init board hardware. */
+  BOARD_InitBootPins();
+
+  /* Initialize LED */
+  LED_INIT();
+
+  /* MUB init */
+  MU_Init(APP_MU);
+
+  /* Send flag to Core 0 to indicate Core 1 has startup */
+  MU_SetFlags(APP_MU, BOOT_FLAG);
+
+  BOARD_InitDebugConsole();
+  ET_LOG(Info, "Booted up in DSP.");
+
+  torch::executor::runtime_init();
+
+  auto loader =
+      torch::executor::util::BufferDataLoader(model_pte, sizeof(model_pte));
+
+  Result<torch::executor::Program> program =
+      torch::executor::Program::load(&loader);
+  if (!program.ok()) {
+    ET_LOG(
+        Error,
+        "ET: Program loading failed @ 0x%p: 0x%" PRIx32,
+        model_pte,
+        program.error());
+  }
+
+  ET_LOG(
+      Info, "AET: Model buffer loaded, has %u methods", program->num_methods());
+
+  const char* method_name = nullptr;
+  {
+    const auto method_name_result = program->get_method_name(0);
+    ET_CHECK_MSG(method_name_result.ok(), "Program has no methods");
+    method_name = *method_name_result;
+  }
+  ET_LOG(Info, "ET: Running method %s", method_name);
+
+  Result<torch::executor::MethodMeta> method_meta =
+      program->method_meta(method_name);
+  if (!method_meta.ok()) {
+    ET_LOG(
+        Error,
+        "ET: Failed to get method_meta for %s: 0x%x",
+        method_name,
+        (unsigned int)method_meta.error());
+  }
+
+  torch::executor::MemoryAllocator method_allocator{
+      torch::executor::MemoryAllocator(
+          sizeof(method_allocator_pool), method_allocator_pool)};
+
+  std::vector<std::unique_ptr<uint8_t[]>> planned_buffers; // Owns the memory
+  std::vector<torch::executor::Span<uint8_t>>
+      planned_spans; // Passed to the allocator
+  size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers();
+
+  for (size_t id = 0; id < num_memory_planned_buffers; ++id) {
+    size_t buffer_size =
+        static_cast<size_t>(method_meta->memory_planned_buffer_size(id).get());
+    ET_LOG(
+        Info, "ET: Setting up planned buffer %zu, size %zu.", id, buffer_size);
+
+    planned_buffers.push_back(std::make_unique<uint8_t[]>(buffer_size));
+    planned_spans.push_back({planned_buffers.back().get(), buffer_size});
+  }
+
+  torch::executor::HierarchicalAllocator planned_memory(
+      {planned_spans.data(), planned_spans.size()});
+
+  torch::executor::MemoryManager memory_manager(
+      &method_allocator, &planned_memory);
+
+  Result<torch::executor::Method> method =
+      program->load_method(method_name, &memory_manager);
+  if (!method.ok()) {
+    ET_LOG(
+        Error,
+        "Loading of method %s failed with status 0x%" PRIx32,
+        method_name,
+        method.error());
+  }
+
+  ET_LOG(Info, "Method loaded.");
+  torch::executor::util::PrepareInputTensors(*method);
+  ET_LOG(Info, "Starting the model execution...");
+
+  Error status = method->execute();
+  ET_LOG(Info, "Executed model");
+  if (status != Error::Ok) {
+    ET_LOG(
+        Error,
+        "Execution of method %s failed with status 0x%" PRIx32,
+        method_name,
+        status);
+  } else {
+    ET_LOG(Info, "Model executed successfully.");
+  }
+
+  while (1) {
+    delay();
+    LED_TOGGLE();
+  }
+
+  return 0;
+}
@@ -0,0 +1,54 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+cmake_minimum_required(VERSION 3.19)
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+if(NOT CMAKE_CXX_STANDARD)
+  set(CMAKE_CXX_STANDARD 17)
+endif()
+
+if(NOT PYTHON_EXECUTABLE)
+  set(PYTHON_EXECUTABLE python3)
+endif()
+
+# Source root directory for pytorch.
+if(NOT TORCH_ROOT)
+  set(TORCH_ROOT ${EXECUTORCH_ROOT}/third-party/pytorch)
+endif()
+
+set(_common_compile_options -Wno-deprecated-declarations)
+
+include(${EXECUTORCH_ROOT}/build/Utils.cmake)
+include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+# Portable kernel sources TODO(larryliu0820): use buck2 to gather the sources
+set(_xtensa_kernels__srcs
+    "${CMAKE_CURRENT_SOURCE_DIR}/op_add.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/broadcast_util.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/repeat_util.cpp"
+)
+
+# Generate C++ bindings to register kernels into both PyTorch (for AOT) and
+# Executorch (for runtime). Here select all ops in functions.yaml
+gen_selected_ops("${CMAKE_CURRENT_LIST_DIR}/functions.yaml" "" "")
+# Expect gen_selected_ops output file to be selected_operators.yaml
+generate_bindings_for_kernels(${CMAKE_CURRENT_SOURCE_DIR}/functions.yaml "")
+message("Generated files ${gen_command_sources}")
+
+#
+# portable_kernels: Pure-C++ kernel library for ATen ops
+#
+# Focused on portability and understandability rather than speed.
+#
+add_library(xtensa_kernels ${_xtensa_kernels__srcs})
+target_link_libraries(xtensa_kernels PRIVATE executorch)
+target_compile_options(xtensa_kernels PUBLIC ${_common_compile_options})
+
+# Build a library for _portable_kernels__srcs
+#
+# portable_ops_lib: Register portable_ops_lib ops kernels into Executorch
+# runtime
+gen_operators_lib("xtensa_ops_lib" xtensa_kernels executorch)