Skip to content

Commit d7ad774

Browse files
committed
Update base for Update on "add instructions about getting mmlu score for instruct models"
Differential Revision: [D64256005](https://our.internmc.facebook.com/intern/diff/D64256005) [ghstack-poisoned]
2 parents 03b9346 + e95aa9d commit d7ad774

File tree

100 files changed

+1822
-621
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

100 files changed

+1822
-621
lines changed

.github/workflows/android-perf.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ jobs:
135135
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
136136
fail-fast: false
137137
with:
138-
runner: linux.2xlarge
138+
runner: linux.4xlarge
139139
docker-image: executorch-ubuntu-22.04-clang12-android
140140
submodules: 'true'
141141
timeout: 60

.gitmodules

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,6 @@
2828
[submodule "backends/xnnpack/third-party/pthreadpool"]
2929
path = backends/xnnpack/third-party/pthreadpool
3030
url = https://github.com/Maratyszcza/pthreadpool.git
31-
[submodule "examples/third-party/fbjni"]
32-
path = examples/third-party/fbjni
33-
url = https://github.com/facebookincubator/fbjni.git
3431
[submodule "extension/llm/third-party/abseil-cpp"]
3532
path = extension/llm/third-party/abseil-cpp
3633
url = https://github.com/abseil/abseil-cpp.git

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,10 @@ option(EXECUTORCH_BUILD_KERNELS_QUANTIZED "Build the quantized kernels" OFF)
201201

202202
option(EXECUTORCH_BUILD_DEVTOOLS "Build the ExecuTorch Developer Tools")
203203

204+
option(EXECUTORCH_NNLIB_OPT "Build Cadence backend Hifi nnlib kernel" OFF)
205+
206+
option(EXECUTORCH_CADENCE_CPU_RUNNER "Build Cadence backend CPU runner" OFF)
207+
204208
option(EXECUTORCH_BUILD_SIZE_TEST "Build the size test" OFF)
205209

206210
option(EXECUTORCH_BUILD_XNNPACK "Build the XNNPACK backend" OFF)

backends/cadence/CMakeLists.txt

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,54 @@ include(${EXECUTORCH_ROOT}/build/Utils.cmake)
2525
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
2626
set(TARGET_DIR reference)
2727

28+
if(EXECUTORCH_CADENCE_CPU_RUNNER)
29+
include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
30+
31+
if(NOT PYTHON_EXECUTABLE)
32+
resolve_python_executable()
33+
endif()
34+
35+
set(_common_compile_options -Wno-deprecated-declarations -fPIC)
36+
37+
# Find prebuilt libraries. executorch package should contain portable_ops_lib,
38+
# etdump, bundled_program.
39+
find_package(executorch CONFIG REQUIRED)
40+
target_link_options_shared_lib(executorch)
41+
target_link_options_shared_lib(portable_ops_lib)
42+
43+
target_include_directories(executorch INTERFACE ${_common_include_directories})
44+
45+
find_package(
46+
gflags REQUIRED PATHS ${CMAKE_CURRENT_BINARY_DIR}/../../third-party
47+
)
48+
49+
add_executable(cadence_runner
50+
${EXECUTORCH_ROOT}/examples/devtools/example_runner/example_runner.cpp
51+
)
52+
target_compile_options(executorch INTERFACE -DET_EVENT_TRACER_ENABLED)
53+
54+
target_include_directories(
55+
etdump INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/../../devtools/include
56+
${EXECUTORCH_ROOT}/third-party/flatcc/include
57+
)
58+
59+
target_include_directories(
60+
cadence_runner PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR}
61+
${_common_include_directories}
62+
)
63+
64+
target_link_libraries(
65+
cadence_runner
66+
executorch
67+
gflags
68+
etdump
69+
extension_data_loader
70+
bundled_program
71+
cadence_ops_lib
72+
flatccrt
73+
)
74+
endif()
75+
2876
if(EXECUTORCH_NNLIB_OPT)
2977
set(TARGET_DIR hifi)
3078
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib)

backends/cadence/cadence_runner/build_cadence_runner.sh renamed to backends/cadence/build_cadence_runner.sh

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ set -euo pipefail
1212
SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
1313
readonly SCRIPT_DIR
1414

15-
readonly EXECUTORCH_ROOT="${SCRIPT_DIR}/../../.."
15+
readonly EXECUTORCH_ROOT="${SCRIPT_DIR}/../.."
1616

1717
# Allow overriding the number of build jobs. Default to 9.
1818
export CMAKE_BUILD_PARALLEL_LEVEL="${CMAKE_BUILD_PARALLEL_LEVEL:-9}"
@@ -25,15 +25,7 @@ main() {
2525
-DCMAKE_BUILD_TYPE=Release \
2626
-DEXECUTORCH_BUILD_DEVTOOLS=ON \
2727
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
28-
-DPYTHON_EXECUTABLE=python3 \
29-
-DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
30-
-DEXECUTORCH_BUILD_HOST_TARGETS=ON \
31-
-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=OFF \
32-
-DEXECUTORCH_BUILD_PTHREADPOOL=OFF \
33-
-DEXECUTORCH_BUILD_CPUINFO=OFF \
34-
-DEXECUTORCH_ENABLE_LOGGING=ON \
35-
-DEXECUTORCH_NNLIB_OPT=OFF \
36-
-Bcmake-out
28+
-Bcmake-out .
3729
cmake --build cmake-out --target install --config Release -j16
3830

3931
local example_dir=backends/cadence
@@ -42,6 +34,7 @@ main() {
4234
rm -rf ${build_dir}
4335
cmake -DCMAKE_PREFIX_PATH="${cmake_prefix_path}" \
4436
-DCMAKE_BUILD_TYPE=Release \
37+
-DEXECUTORCH_CADENCE_CPU_RUNNER=ON \
4538
-B"${build_dir}" \
4639
"${example_dir}"
4740
cmake --build "${build_dir}" --config Release -j16

backends/cadence/cadence_runner/CMakeLists.txt

Lines changed: 0 additions & 74 deletions
This file was deleted.

backends/cadence/reference/kernels/kernels.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9+
#include <executorch/backends/cadence/reference/kernels/kernels.h>
910
#include <math.h>
1011
#include <algorithm>
1112
#include <cstring>

backends/cadence/reference/operators/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ set(_aten_ops__srcs
3333
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/reduce_util.cpp"
3434
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/repeat_util.cpp"
3535
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/slice_util.cpp"
36-
"${EXECUTORCH_ROOT}/kernels/portable/cpu/pattern/unary_ufunc_realhb_to_floath.cpp"
36+
"${EXECUTORCH_ROOT}/kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp"
3737
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_bmm.cpp"
3838
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_cat.cpp"
3939
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_clone.cpp"

backends/cadence/reference/operators/quantized_conv_out.cpp

Lines changed: 59 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -190,34 +190,65 @@ void quantized_conv_out(
190190
// per-channel
191191
bool per_tensor_quantized = bias_scale.numel() == 1;
192192

193-
conv2d_nchw_core_generic<uint8_t, uint8_t, int32_t, uint8_t, true>(
194-
input.const_data_ptr<uint8_t>(),
195-
weight.const_data_ptr<uint8_t>(),
196-
bias.const_data_ptr<int32_t>(),
197-
out.mutable_data_ptr<uint8_t>(),
198-
n,
199-
c,
200-
h,
201-
w,
202-
oc,
203-
wc,
204-
wh,
205-
ww,
206-
oh,
207-
ow,
208-
stride[0],
209-
stride[1],
210-
padding[0],
211-
padding[1],
212-
dilation[0],
213-
dilation[1],
214-
groups,
215-
in_zero_point,
216-
weight_zero_point.const_data_ptr<int32_t>(),
217-
bias_scale.const_data_ptr<float>(),
218-
output_scale,
219-
(uint8_t)output_zero_point,
220-
per_tensor_quantized);
193+
if (out.scalar_type() == exec_aten::ScalarType::Byte) {
194+
conv2d_nchw_core_generic<uint8_t, uint8_t, int32_t, uint8_t, true>(
195+
input.const_data_ptr<uint8_t>(),
196+
weight.const_data_ptr<uint8_t>(),
197+
bias.const_data_ptr<int32_t>(),
198+
out.mutable_data_ptr<uint8_t>(),
199+
n,
200+
c,
201+
h,
202+
w,
203+
oc,
204+
wc,
205+
wh,
206+
ww,
207+
oh,
208+
ow,
209+
stride[0],
210+
stride[1],
211+
padding[0],
212+
padding[1],
213+
dilation[0],
214+
dilation[1],
215+
groups,
216+
in_zero_point,
217+
weight_zero_point.const_data_ptr<int32_t>(),
218+
bias_scale.const_data_ptr<float>(),
219+
output_scale,
220+
(uint8_t)output_zero_point,
221+
per_tensor_quantized);
222+
} else if (out.scalar_type() == exec_aten::ScalarType::Char) {
223+
conv2d_nchw_core_generic<int8_t, int8_t, int32_t, int8_t, true>(
224+
input.const_data_ptr<int8_t>(),
225+
weight.const_data_ptr<int8_t>(),
226+
bias.const_data_ptr<int32_t>(),
227+
out.mutable_data_ptr<int8_t>(),
228+
n,
229+
c,
230+
h,
231+
w,
232+
oc,
233+
wc,
234+
wh,
235+
ww,
236+
oh,
237+
ow,
238+
stride[0],
239+
stride[1],
240+
padding[0],
241+
padding[1],
242+
dilation[0],
243+
dilation[1],
244+
groups,
245+
in_zero_point,
246+
weight_zero_point.const_data_ptr<int32_t>(),
247+
bias_scale.const_data_ptr<float>(),
248+
output_scale,
249+
(int8_t)output_zero_point,
250+
per_tensor_quantized);
251+
}
221252
}
222253

223254
}; // namespace native

backends/cadence/runtime/executor.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,7 @@ def __init__(
106106
working_dir: str = "",
107107
):
108108
self.working_dir = working_dir
109-
self.executor_builder = (
110-
"./backends/cadence/cadence_runner/build_cadence_runner.sh"
111-
)
109+
self.executor_builder = "./backends/cadence/build_cadence_runner.sh"
112110
self.execute_runner = "./cmake-out/backends/cadence/cadence_runner"
113111
self.bundled_program_path: str = "CadenceDemoModel.bpte"
114112

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
import torch
8+
from executorch.exir.pass_base import ExportPass, PassResult
9+
from torch.fx.experimental.proxy_tensor import make_fx
10+
11+
12+
class DecomposeEinsum(ExportPass):
13+
"""
14+
Decompose einsum for quantization annotation to work properly.
15+
"""
16+
17+
def __init__(self) -> None:
18+
super().__init__()
19+
20+
def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
21+
graph = graph_module.graph
22+
for node in graph.nodes:
23+
if node.target == torch.ops.aten.einsum.default:
24+
decomposed_module = make_fx(
25+
node.target,
26+
tracing_mode="fake",
27+
)(node.args[0], [arg.meta["val"] for arg in node.args[1]])
28+
29+
with graph.inserting_before(node):
30+
# remap is used to map original node values to new node values,
31+
# which ensures that reference to nodes are correclty updated in the new graph
32+
remap = {}
33+
# Different from other nodes, einsum args[0] is the einsum equation,
34+
# while input nodes are stored in args[1]
35+
for i, arg in enumerate(node.args[1]):
36+
remap[f"arg1_{i+1}"] = arg
37+
38+
for decomposed_node in decomposed_module.graph.nodes:
39+
# This is the arg[0] equation string, which is not required anymore after decomposition
40+
if "arg0" in decomposed_node.name:
41+
continue
42+
43+
# no need to copy existent 'output'
44+
if decomposed_node.op == "output":
45+
for user in node.users.copy():
46+
# remap
47+
user.replace_input_with(
48+
node,
49+
remap[decomposed_node.args[0][0]],
50+
)
51+
# no need to copy existent placeholders
52+
elif decomposed_node.op == "placeholder":
53+
# replace node map from string to graph node
54+
remap[decomposed_node] = remap.pop(decomposed_node.name)
55+
else:
56+
remap[decomposed_node] = graph.node_copy(
57+
decomposed_node,
58+
arg_transform=lambda x, remap=remap: remap[x],
59+
)
60+
61+
graph.erase_node(node)
62+
63+
graph.eliminate_dead_code()
64+
graph_module.recompile()
65+
return PassResult(graph_module, True)

backends/qualcomm/_passes/insert_requantize.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class InsertRequantize(ExportPass):
2828
# we don't use the 2nd output, 2nd output is an integer, etc.
2929
multi_output_op_ignore_set = {
3030
exir_ops.edge.aten._native_batch_norm_legit_no_training.default,
31+
exir_ops.edge.aten.topk.default,
3132
}
3233

3334
def __init__(

backends/qualcomm/_passes/layout_transform.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ class LayoutTransform(ExportPass):
6565
exir_ops.edge.aten.sqrt.default,
6666
exir_ops.edge.aten.sub.Tensor,
6767
exir_ops.edge.aten.sum.dim_IntList,
68+
exir_ops.edge.aten.topk.default,
6869
exir_ops.edge.aten._to_copy.default,
6970
exir_ops.edge.aten.split_with_sizes.default,
7071
*q_ops,

0 commit comments

Comments
 (0)