Skip to content

Commit 0e992c6

Browse files
committed
Update on "Take advantage of C++17 in scalar_type_util.h"
I generated a big ugly table because we couldn't make promoteTypes constexpr before we had C++17. Now we have C++17. Differential Revision: [D66181946](https://our.internmc.facebook.com/intern/diff/D66181946/) [ghstack-poisoned]
2 parents f6c586c + d4cd2b2 commit 0e992c6

File tree

50 files changed

+7852
-619
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+7852
-619
lines changed

.ci/scripts/test_llama.sh

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,41 @@ set -exu
99
# shellcheck source=/dev/null
1010
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1111

12-
MODEL_NAME=$1 # stories110M
13-
BUILD_TOOL=$2 # buck2 or cmake
14-
DTYPE=$3 # fp16, bf16, or fp32
15-
MODE=${4:-"xnnpack+custom"} # portable or xnnpack+custom or xnnpack+custom+qe
16-
UPLOAD_DIR=${5:-}
12+
while [[ $# -gt 0 ]]; do
13+
case "$1" in
14+
-model)
15+
MODEL_NAME="$2" # stories110M
16+
shift 2
17+
;;
18+
-build_tool)
19+
BUILD_TOOL="$2" # buck2 or cmake
20+
shift 2
21+
;;
22+
-dtype)
23+
DTYPE="$2" # fp16, bf16, or fp32
24+
shift 2
25+
;;
26+
-mode)
27+
MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
28+
shift 2
29+
;;
30+
-upload)
31+
UPLOAD_DIR="$2"
32+
shift 2
33+
;;
34+
*)
35+
echo "Unknown option: $1"
36+
usage
37+
;;
38+
esac
39+
done
40+
41+
# Default mode to xnnpack+custom if not set
42+
MODE=${MODE:-"xnnpack+custom"}
43+
44+
# Default UPLOAD_DIR to empty string if not set
45+
UPLOAD_DIR="${UPLOAD_DIR:-}"
46+
1747
if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
1848
echo "Expecting atleast 4 positional arguments"
1949
echo "Usage: [...]"
@@ -150,7 +180,7 @@ cleanup_files() {
150180
}
151181

152182
prepare_artifacts_upload() {
153-
if [ -n "$UPLOAD_DIR" ]; then
183+
if [ -n "${UPLOAD_DIR}" ]; then
154184
echo "Preparing for uploading generated artifacs"
155185
zip -j model.zip "${EXPORTED_MODEL_NAME}" tokenizer.bin
156186
mkdir -p "${UPLOAD_DIR}"

.github/workflows/pull.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ jobs:
117117
# Install requirements for export_llama
118118
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
119119
# Test llama2
120-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}" "${ARTIFACTS_DIR_NAME}"
120+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}"
121121
122122
test-llama-runner-linux-android:
123123
name: test-llama-runner-linux-android
@@ -393,7 +393,7 @@ jobs:
393393
# Install requirements for export_llama
394394
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
395395
# Test llama2
396-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
396+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}"
397397
398398
test-phi-3-mini-runner-linux:
399399
name: test-phi-3-mini-runner-linux

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ jobs:
261261
# Install requirements for export_llama
262262
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
263263
# Test llama2
264-
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M cmake "${DTYPE}" "${MODE}"
264+
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}"
265265
266266
# # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner.
267267
# test-llava-runner-macos:

.gitmodules

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
[submodule "backends/arm/third-party/ethos-u-core-driver"]
22
path = backends/arm/third-party/ethos-u-core-driver
3-
url = https://git.mlplatform.org/ml/ethos-u/ethos-u-core-driver.git/
3+
url = https://github.com/pytorch-labs/ethos-u-core-driver-mirror
44
[submodule "backends/arm/third-party/serialization_lib"]
55
path = backends/arm/third-party/serialization_lib
6-
url = https://git.mlplatform.org/tosa/serialization_lib.git/
6+
url = https://github.com/pytorch-labs/tosa_serialization_lib-mirror
77
[submodule "backends/vulkan/third-party/Vulkan-Headers"]
88
path = backends/vulkan/third-party/Vulkan-Headers
99
url = https://github.com/KhronosGroup/Vulkan-Headers

backends/cadence/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
## Supported DSPs (in progress)
44
- HiFi Audio
5-
- ...
5+
- Fusion G3
66

77
## Tutorial
88

backends/cadence/aot/TARGETS

Lines changed: 159 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ python_library(
3939
":passes",
4040
":utils",
4141
":ops_registrations",
42+
":replace_ops",
4243
"//caffe2:torch",
4344
"//executorch/backends/cadence/aot/quantizer:fusion_pass",
4445
"//executorch/backends/cadence/aot/quantizer:quantizer",
@@ -74,12 +75,14 @@ python_library(
7475
":utils",
7576
":fuse_ops",
7677
":simplify_ops",
78+
":replace_ops",
79+
":reorder_ops",
80+
":remove_ops",
7781
"//caffe2:torch",
7882
"//executorch/exir:pass_base",
7983
"//executorch/exir/dialects:lib",
8084
"//executorch/exir/passes:lib",
8185
"//executorch/exir/passes:spec_prop_pass",
82-
"//executorch/backends/transforms:remove_clone_ops"
8386
],
8487
)
8588

@@ -180,6 +183,63 @@ python_library(
180183
],
181184
)
182185

186+
python_library(
187+
name = "remove_ops",
188+
srcs = [
189+
"remove_ops.py",
190+
],
191+
typing = True,
192+
deps = [
193+
"//caffe2:torch",
194+
"//executorch/backends/cadence/aot:pass_utils",
195+
"//executorch/backends/cadence/aot:simplify_ops",
196+
"//executorch/exir:pass_base",
197+
"//executorch/exir/dialects:lib",
198+
"//executorch/exir/dialects/edge:lib",
199+
"//executorch/exir/passes:spec_prop_pass",
200+
"//executorch/backends/transforms:remove_clone_ops"
201+
],
202+
)
203+
204+
python_library(
205+
name = "reorder_ops",
206+
srcs = [
207+
"reorder_ops.py",
208+
],
209+
typing = True,
210+
deps = [
211+
"//caffe2:torch",
212+
"//executorch/backends/cadence/aot:compiler_utils",
213+
"//executorch/backends/cadence/aot:pass_utils",
214+
"//executorch/backends/cadence/aot:utils",
215+
"//executorch/exir:pass_base",
216+
"//executorch/exir:tensor",
217+
"//executorch/exir/dialects:lib",
218+
"//executorch/exir/dialects/edge:lib",
219+
],
220+
)
221+
222+
python_library(
223+
name = "replace_ops",
224+
srcs = [
225+
"replace_ops.py",
226+
],
227+
typing = True,
228+
deps = [
229+
":pass_utils",
230+
"//caffe2:torch",
231+
"//executorch/backends/cadence/aot:compiler_utils",
232+
"//executorch/backends/cadence/aot:fuse_ops",
233+
"//executorch/backends/cadence/aot:pass_utils",
234+
"//executorch/backends/cadence/aot:remove_ops",
235+
"//executorch/backends/cadence/aot:utils",
236+
"//executorch/exir:pass_base",
237+
"//executorch/exir/dialects:lib",
238+
"//executorch/exir/dialects/edge:lib",
239+
"//executorch/exir/passes:spec_prop_pass",
240+
],
241+
)
242+
183243
python_unittest(
184244
name = "test_graph_builder",
185245
srcs = [
@@ -196,3 +256,101 @@ python_unittest(
196256
":ops_registrations"
197257
],
198258
)
259+
260+
python_unittest(
261+
name = "test_replace_ops_passes",
262+
srcs = [
263+
"tests/test_replace_ops_passes.py",
264+
],
265+
supports_static_listing = False,
266+
typing = True,
267+
deps = [
268+
"fbsource//third-party/pypi/parameterized:parameterized",
269+
":compiler",
270+
":replace_ops",
271+
"//caffe2:torch",
272+
"//executorch/backends/cadence/aot:compiler",
273+
"//executorch/backends/cadence/aot:graph_builder",
274+
"//executorch/backends/cadence/aot:pass_utils",
275+
"//executorch/exir:pass_base",
276+
"//executorch/exir/dialects:lib",
277+
"//executorch/exir/passes:lib",
278+
],
279+
)
280+
281+
python_unittest(
282+
name = "test_fusion_ops_passes",
283+
srcs = [
284+
"tests/test_fusion_ops_passes.py",
285+
],
286+
typing = True,
287+
deps = [
288+
":compiler",
289+
"//caffe2:torch",
290+
"//executorch/backends/cadence/aot:compiler",
291+
"//executorch/backends/cadence/aot:fuse_ops",
292+
"//executorch/backends/cadence/aot:graph_builder",
293+
"//executorch/backends/cadence/aot:ops_registrations",
294+
"//executorch/backends/cadence/aot:pass_utils",
295+
"//executorch/exir/dialects:lib",
296+
"//executorch/exir/dialects/edge:lib",
297+
],
298+
)
299+
300+
python_unittest(
301+
name = "test_remove_ops_passes",
302+
srcs = [
303+
"tests/test_remove_ops_passes.py",
304+
],
305+
supports_static_listing = False,
306+
typing = True,
307+
deps = [
308+
"fbsource//third-party/pypi/parameterized:parameterized",
309+
"fbsource//third-party/pypi/pyre-extensions:pyre-extensions",
310+
":compiler",
311+
"//caffe2:torch",
312+
"//executorch/backends/cadence/aot:compiler",
313+
"//executorch/backends/cadence/aot:ops_registrations",
314+
"//executorch/backends/cadence/aot:pass_utils",
315+
"//executorch/backends/cadence/aot:remove_ops",
316+
"//executorch/backends/cadence/aot/quantizer:quantizer",
317+
"//executorch/exir/dialects:lib",
318+
],
319+
)
320+
321+
python_unittest(
322+
name = "test_simplify_ops_passes",
323+
srcs = [
324+
"tests/test_simplify_ops_passes.py",
325+
],
326+
supports_static_listing = False,
327+
typing = True,
328+
deps = [
329+
"fbsource//third-party/pypi/parameterized:parameterized",
330+
"//caffe2:torch",
331+
"//executorch/backends/cadence/aot:compiler",
332+
"//executorch/backends/cadence/aot:ops_registrations",
333+
"//executorch/backends/cadence/aot:pass_utils",
334+
"//executorch/backends/cadence/aot:simplify_ops",
335+
"//executorch/exir/dialects:lib",
336+
],
337+
)
338+
339+
python_unittest(
340+
name = "test_reorder_ops_passes",
341+
srcs = [
342+
"tests/test_reorder_ops_passes.py",
343+
],
344+
typing = True,
345+
deps = [
346+
":compiler",
347+
":pass_utils",
348+
"//caffe2:torch",
349+
"//executorch/backends/cadence/aot:compiler",
350+
"//executorch/backends/cadence/aot:fuse_ops",
351+
"//executorch/backends/cadence/aot:ops_registrations",
352+
"//executorch/backends/cadence/aot:pass_utils",
353+
"//executorch/backends/cadence/aot:reorder_ops",
354+
"//executorch/exir/dialects:lib",
355+
],
356+
)

backends/cadence/aot/compiler.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212

1313
import executorch.backends.cadence.aot.ops_registrations # noqa
1414
import torch
15-
16-
from executorch.backends.cadence.aot.passes import ReplaceSafeSoftmaxWithSoftmax
1715
from executorch.backends.cadence.aot.quantizer.fusion_pass import QuantFusion
1816
from executorch.backends.cadence.aot.quantizer.quantizer import CadenceQuantizer
17+
18+
from executorch.backends.cadence.aot.replace_ops import ReplaceSafeSoftmaxWithSoftmax
1919
from executorch.backends.cadence.aot.utils import model_gm_has_SDPA, model_is_quantized
2020
from executorch.backends.transforms.decompose_sdpa import (
2121
DecomposeScaledDotProductAttention,
@@ -194,9 +194,6 @@ def export_to_edge(
194194
return edge_prog_manager
195195

196196

197-
# Export the model and lower it to an EdgeProgramManager (in edge IR), and
198-
# apply passes specific to Cadence DSP execution. Return both to print the
199-
# differences.
200197
def export_to_cadence(
201198
model: torch.nn.Module,
202199
inputs: tuple[object, ...],
@@ -216,6 +213,25 @@ def export_to_cadence(
216213
return cadence_prog_manager
217214

218215

216+
def quantize_and_export_to_cadence(
217+
model: torch.nn.Module,
218+
inputs: tuple[object, ...],
219+
dump_graphs: bool = False,
220+
opt_level: int = 1,
221+
) -> EdgeProgramManager:
222+
quantized_model = quantize_pt2(model, inputs)
223+
224+
return export_to_cadence(
225+
quantized_model,
226+
inputs,
227+
opt_level=opt_level,
228+
dump_graphs=dump_graphs,
229+
)
230+
231+
232+
# Export the model and lower it to an EdgeProgramManager (in edge IR), and
233+
# apply passes specific to Cadence DSP execution. Return both to print the
234+
# differences.
219235
def export_to_executorch_gen_etrecord(
220236
model: torch.nn.Module,
221237
inputs: tuple[object, ...],

backends/cadence/aot/functions.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,16 @@
7777
- arg_meta: null
7878
kernel_name: torch::executor::gelu_out
7979

80+
- op: hardtanh.out
81+
kernels:
82+
- arg_meta: null
83+
kernel_name: torch::executor::hardtanh_out
84+
85+
- op: max_pool2d_with_indices.out
86+
kernels:
87+
- arg_meta: null
88+
kernel_name: torch::executor::max_pool2d_with_indices_out
89+
8090
- op: mean.out
8191
kernels:
8292
- arg_meta: null

backends/cadence/aot/functions_hifi.yaml

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,26 @@
6262
- arg_meta: null
6363
kernel_name: torch::executor::full_out
6464

65+
- op: gelu.out
66+
kernels:
67+
- arg_meta: null
68+
kernel_name: torch::executor::gelu_out
69+
70+
- op: hardtanh.out
71+
kernels:
72+
- arg_meta: null
73+
kernel_name: torch::executor::hardtanh_out
74+
75+
- op: max_pool2d_with_indices.out
76+
kernels:
77+
- arg_meta: null
78+
kernel_name: torch::executor::max_pool2d_with_indices_out
79+
6580
- op: mean.out
6681
kernels:
6782
- arg_meta: null
68-
kernel_name: cadence::impl::HiFi::mean_dim_out
69-
83+
kernel_name: cadence::impl::HiFi::mean_dim_out
84+
7085
- op: mul.out
7186
kernels:
7287
- arg_meta: null

0 commit comments

Comments
 (0)