Skip to content

Commit 78197a3

Browse files
robellfacebook-github-bot
authored andcommitted
Initial framework of an ethos-u runtime backend #3 (#659)
Summary: Cleaned up ethos-u runtime flow, replaces #595 Added shell of runtime Arm Backend for Ethos-U ./backends/arm/cmake/build.sh will pull the appropriate compiler and build the ethos-u driver and delegate and produce the libraries needed to link an application in, which can run a delegated .pte with the revised vela_bin_stream form contained in these patches. Currently supports .pte files stored in SRAM. Pull Request resolved: #659 Reviewed By: cccclai Differential Revision: D50035452 Pulled By: digantdesai fbshipit-source-id: 7b8a23836c92dfa9235decf4c631654fa8fc396a
1 parent 0aff4da commit 78197a3

File tree

17 files changed

+1358
-67
lines changed

17 files changed

+1358
-67
lines changed

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,6 @@
4343
[submodule "examples/third-party/fbjni"]
4444
path = examples/third-party/fbjni
4545
url = https://github.com/facebookincubator/fbjni.git
46+
[submodule "backends/arm/third-party/ethos-u-core-driver"]
47+
path = backends/arm/third-party/ethos-u-core-driver
48+
url = https://git.mlplatform.org/ml/ethos-u/ethos-u-core-driver.git

CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,13 @@ if(EXECUTORCH_BUILD_QNN)
336336
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/examples/qualcomm)
337337
endif()
338338

339+
# Build Arm Baremetal backend
340+
option(EXECUTORCH_BUILD_ARM_BAREMETAL
341+
"Build the Arm Baremetal flow for Cortex-M and Ethos-U" OFF)
342+
if(EXECUTORCH_BUILD_ARM_BAREMETAL)
343+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/arm)
344+
endif()
345+
339346
# Add selective build subdirectory
340347
if(BUILD_SELECTIVE_BUILD_TEST)
341348
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/examples/selective_build)

backends/arm/CMakeLists.txt

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Copyright 2023 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
cmake_minimum_required(VERSION 3.19)
6+
7+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
8+
9+
# Source root directory for executorch.
10+
if(NOT EXECUTORCH_ROOT)
11+
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
12+
endif()
13+
14+
include(${EXECUTORCH_ROOT}/build/Utils.cmake)
15+
16+
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
17+
18+
include(cmake/Dependencies.cmake)
19+
20+
set(_arm_baremetal_sources backends/arm/runtime/ArmBackendEthosU.cpp)
21+
list(TRANSFORM _arm_baremetal_sources PREPEND "${EXECUTORCH_ROOT}/")
22+
23+
add_library(
24+
executorch_delegate_ethos_u
25+
STATIC ${_arm_baremetal_sources}
26+
)
27+
target_include_directories(
28+
executorch_delegate_ethos_u
29+
PUBLIC
30+
${_common_include_directories}
31+
)
32+
target_include_directories(
33+
executorch_delegate_ethos_u
34+
PUBLIC
35+
${DRIVER_ETHOSU_INCLUDE_DIR}
36+
)

backends/arm/arm_backend.py

Lines changed: 100 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
import logging
1313
import operator
1414
import os
15+
import struct
16+
import subprocess
1517
import tempfile
1618
from typing import final, List
1719

@@ -136,13 +138,89 @@ def dbg_tosa_dump(tosa_fb, path):
136138
fb = tosa_fb.serialize()
137139
js = tosa_fb.writeJson(filename)
138140

139-
f = open(path + filename, "wb")
140-
f.write(fb)
141-
f.close()
141+
with open(path + filename, "wb") as f:
142+
f.write(fb)
142143

143-
f = open(path + "desc.json", "w")
144-
f.write(js)
145-
f.close()
144+
with open(path + "desc.json", "w") as f:
145+
f.write(js)
146+
147+
148+
# Output to Vela with current file-based compilation
149+
# WARNING: if this changes, the runtime reader also needs to change
150+
def vela_compile(tosa_fb):
151+
with tempfile.TemporaryDirectory() as tmpdir:
152+
tosaname = "out.tosa"
153+
flatbuffer = tosa_fb.serialize()
154+
with open(os.path.join(tmpdir, tosaname), "wb") as f:
155+
f.write(flatbuffer)
156+
157+
# invoke vela
158+
vela_command = (
159+
f"cd {tmpdir}; vela --accelerator-config ethos-u55-128 {tosaname}"
160+
)
161+
subprocess.run([vela_command], shell=True, check=True)
162+
163+
np_path = os.path.join(tmpdir, "output", "out_sg0_vela.npz")
164+
blocks = b""
165+
with np.load(np_path, allow_pickle=False) as data:
166+
# Emit the NPZ regions as:
167+
# - 16 byte block name null terminated string (padded to 16 if name shorter)
168+
# - 4 bytes of int32 block length and 12 bytes of 0's
169+
# - block data (padded to 16 byte alignment at end)
170+
# Repeat for all blocks
171+
for key in data.keys():
172+
block_name = bytes(key, "utf8")[:15]
173+
block_name = block_name + b"\x00" * (16 - len(block_name))
174+
175+
block_data = b""
176+
if key in ("input_shape", "output_shape"):
177+
inputs = data[key]
178+
# Encode a struct of int len; and one or more int x,y,z,w shape;
179+
input_struct = struct.pack("<i", len(inputs))
180+
for inp in inputs:
181+
assert len(inp) <= 4
182+
inp_pad = inp.tolist() + [0] * (4 - len(inp))
183+
input_struct = input_struct + struct.pack("<iiii", *inp_pad)
184+
block_data = input_struct
185+
elif key in ("input_offset", "output_offset"):
186+
inputs = data[key]
187+
if key == "output_offset" and len(inputs) > 1:
188+
raise RuntimeError(
189+
"Currently only support one output in Vela ArmBackend"
190+
)
191+
offset_struct = struct.pack("<i", len(inputs))
192+
for inp in inputs:
193+
offset_struct = offset_struct + struct.pack("<i", inp)
194+
block_data = offset_struct
195+
else:
196+
block_data = data[key].tobytes()
197+
# We need the acual unpadded block lengths for hw setup
198+
block_length = len(block_data).to_bytes(16, "little")
199+
# pad block data to multiple of 16 bytes
200+
block_data = block_data + b"\x00" * (15 - (len(block_data) - 1) % 16)
201+
202+
block = block_name + block_length + block_data
203+
blocks = blocks + block
204+
205+
# Add a block for scratch, inputs and outputs
206+
# scratch shape is a 1 element array giving us size in bytes
207+
block_name = bytes("scratch_data", "utf8")[:15]
208+
block_name = block_name + b"\x00" * (16 - len(block_name))
209+
block_length = data["scratch_shape"][0].item()
210+
block_length = block_length + (15 - (block_length - 1) % 16)
211+
block_data = b"\x00" * block_length
212+
block_length = block_length.to_bytes(16, "little")
213+
block = block_name + block_length + block_data
214+
blocks = blocks + block
215+
# TODO are these already in scratch shape? look to be
216+
# input_shape * input_elem_size
217+
# output_shape * output_elem_size
218+
# input_offset and output_offset specify the location these arrays are written from base of scratch
219+
220+
# return 16 byte VELA bin header + blocks + footer
221+
header = bytes("vela_bin_stream", "utf-8") + b"\x00"
222+
footer = bytes("vela_end_stream", "utf-8") + b"\x00"
223+
return header + blocks + footer
146224

147225

148226
def dbg_fail(node, tosa_fb, path):
@@ -237,14 +315,13 @@ def preprocess( # noqa: C901
237315
# if a debug/test build capture output files from TOSA stage
238316
path = None
239317
debug_output = False
318+
output_format = "vela"
240319
for spec in compile_spec:
241320
if spec.key == "debug_tosa_path":
242321
path = spec.value.decode()
243322
debug_output = True
244-
245-
# in non debug builds we still pass files to vela
246-
if path is None:
247-
path = tempfile.mkdtemp(prefix="arm_tosa_")
323+
if spec.key == "output_format":
324+
output_format = spec.value.decode()
248325

249326
# Converted output for this subgraph, serializer needs path early as it emits
250327
# const data directly. Path created and data written only in debug builds.
@@ -890,6 +967,16 @@ def preprocess( # noqa: C901
890967
if debug_output is True:
891968
dbg_tosa_dump(tosa_fb, path)
892969

893-
# Serialize and return the tosa flatbuffer
894-
fb = tosa_fb.serialize()
895-
return PreprocessResult(processed_bytes=bytes(fb))
970+
# Serialize and return the program. While we have always produced TOSA
971+
# output as an intermediate, some flows compile to device binaries in
972+
# preprocess and some consume TOSA fb directly.
973+
if output_format == "vela":
974+
# Emit vela_bin_stream format
975+
binary = vela_compile(tosa_fb)
976+
elif output_format == "tosa":
977+
# Emit TOSA flatbuffer
978+
binary = bytes(tosa_fb.serialize())
979+
else:
980+
raise RuntimeError(f"Unknown format {output_format}")
981+
982+
return PreprocessResult(processed_bytes=binary)

backends/arm/cmake/Dependencies.cmake

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Copyright 2023 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
set(THIRD_PARTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/third-party")
7+
8+
# Ethos-U driver
9+
set(DRIVER_ETHOSU_INCLUDE_DIR "${THIRD_PARTY_ROOT}/ethos-u-core-driver/include")
10+
include_directories( ${DRIVER_ETHOSU_INCLUDE_DIR} )

backends/arm/cmake/build.sh

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/bin/bash
2+
# Copyright 2023 Arm Limited and/or its affiliates.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
set -e
7+
8+
#
9+
# Setup toolchain
10+
#
11+
BASEDIR=`realpath $(dirname "$0")`
12+
echo "building using build.sh in $BASEDIR"
13+
14+
ARCH=$(uname -i)
15+
GCCPATH=${BASEDIR}/arm-gnu-toolchain-12.3.rel1-${ARCH}-arm-none-eabi/bin/
16+
17+
echo $GCCPATH
18+
if test -d "${GCCPATH}"; then
19+
echo Using exising compiler ${GCCPATH}
20+
else
21+
pushd ${BASEDIR}/
22+
./toolchain.sh
23+
popd
24+
fi
25+
export PATH=${PATH}:${GCCPATH}
26+
27+
echo building with `arm-none-eabi-gcc -v 2>&1 | grep "^gcc"`
28+
29+
30+
#
31+
# Prepare and run clean build
32+
#
33+
rm -rf buck-out/ build/lib/ cmake-out/
34+
rm -rf cmake-corstone
35+
mkdir cmake-corstone
36+
cd cmake-corstone
37+
38+
#cmake -DBUCK2=buck2 ..
39+
40+
#cmake --toolchain backends/arm/cmake/arm-none-eabi-gcc.cmake ..
41+
cmake -DFLATC_EXECUTABLE=flatc \
42+
-DEXECUTORCH_BUILD_XNNPACK=OFF \
43+
-DEXECUTORCH_BUILD_HOST_TARGETS=OFF \
44+
-DEXECUTORCH_BUILD_ARM_BAREMETAL=ON \
45+
-DCMAKE_SYSTEM_PROCESSOR=cortex-m55+nodsp+nofp \
46+
-DETHOSU_TARGET_NPU_CONFIG=ethos-u55-128 \
47+
--toolchain backends/arm/cmake/arm-none-eabi-gcc.cmake \
48+
-DCMAKE_BUILD_TYPE=Release \
49+
-DEXECUTORCH_ENABLE_LOGGING_RELEASE_MODE=ON \
50+
..
51+
52+
cd ..
53+
cmake --build cmake-corstone -j9 --target ethos_u ethosu_core_driver executorch portable_ops_lib portable_kernels

backends/arm/cmake/toolchain.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/bin/bash
2+
# Copyright 2023 Arm Limited and/or its affiliates.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
set -e
7+
8+
# Cross compiler for Arm baremetal (e.g. Corestone-300 FVP or silcon)
9+
ARCH=$(uname -i)
10+
curl -o gcc.tar.xz https://armkeil.blob.core.windows.net/developer/Files/downloads/gnu/12.3.rel1/binrel/arm-gnu-toolchain-12.3.rel1-${ARCH}-arm-none-eabi.tar.xz
11+
tar xf gcc.tar.xz
12+
export PATH=${PATH}:`(cd arm-gnu-toolchain-12.3.rel1-aarch64-arm-none-eabi/bin/; pwd)`

0 commit comments

Comments
 (0)