Skip to content

Commit 4fa689d

Browse files
committed
Update
[ghstack-poisoned]
2 parents 22efe6f + 5dd2ed3 commit 4fa689d

File tree

115 files changed

+5020
-2164
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

115 files changed

+5020
-2164
lines changed

.ci/scripts/test_qnn_static_llama.sh

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/bin/bash
2+
# Copyright (c) Qualcomm Innovation Center, Inc.
3+
# All rights reserved
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
11+
12+
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
13+
export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
14+
export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
15+
export PYTHONPATH=".."
16+
cp schema/program.fbs exir/_serialize/program.fbs
17+
cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
18+
cp -f build-x86/backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
19+
cp -f build-x86/backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
20+
21+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
22+
PYTHON_EXECUTABLE=python3
23+
fi
24+
25+
which "${PYTHON_EXECUTABLE}"
26+
27+
# Although static llama CI does not require graphviz, it is required by test_qnn_delegate.py
28+
pip install graphviz
29+
30+
# Download stories llama110m artifacts
31+
download_stories_model_artifacts
32+
echo "Creating tokenizer.bin"
33+
$PYTHON_EXECUTABLE -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
34+
35+
set +e
36+
# Compile only as weight sharing is not applicable on x86
37+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleScript.test_stories_single_llama --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir . --compile_only
38+
exit_code1=$?
39+
40+
# Checks accuracy with weight sharing disabled since x86 does not support weight sharing.
41+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleScript.test_stories_single_llama --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir . --enable_x86_64
42+
exit_code2=$?
43+
44+
# Check the exit codes and print messages
45+
if [ $exit_code1 -ne 0 ]; then
46+
echo "Static Llama compile only with weight sharing test failed. $exit_code1."
47+
fi
48+
49+
if [ $exit_code2 -ne 0 ]; then
50+
echo "Static Llama accuracy test failed. $exit_code2."
51+
fi
52+
53+
# Return failure if either program failed
54+
if [ $exit_code1 -ne 0 ] || [ $exit_code2 -ne 0 ]; then
55+
exit 1
56+
else
57+
exit 0
58+
fi
59+
set -e

.github/workflows/android-perf.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ jobs:
222222
--preq_mode 8da4w_output_8da8w \
223223
--preq_group_size 32 \
224224
--max_seq_length 2048 \
225+
--max_context_length 2048 \
225226
--output_name "${OUT_ET_MODEL_NAME}.pte" \
226227
-kv \
227228
-d fp32 \
@@ -253,6 +254,7 @@ jobs:
253254
--xnnpack-extended-ops \
254255
-d fp32 \
255256
--max_seq_length 2048 \
257+
--max_context_length 2048 \
256258
--output_name "${OUT_ET_MODEL_NAME}.pte" \
257259
--metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}'
258260
ls -lh "${OUT_ET_MODEL_NAME}.pte"

.github/workflows/apple-perf.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ jobs:
233233
--preq_mode 8da4w_output_8da8w \
234234
--preq_group_size 32 \
235235
--max_seq_length 2048 \
236+
--max_context_length 2048 \
236237
--output_name "${OUT_ET_MODEL_NAME}.pte" \
237238
-kv \
238239
-d fp32 \
@@ -264,6 +265,7 @@ jobs:
264265
--xnnpack-extended-ops \
265266
-d fp32 \
266267
--max_seq_length 2048 \
268+
--max_context_length 2048 \
267269
--output_name "${OUT_ET_MODEL_NAME}.pte" \
268270
--metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}'
269271
ls -lh "${OUT_ET_MODEL_NAME}.pte"

.github/workflows/pull.yml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,39 @@ jobs:
437437
# Test llama2
438438
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
439439
440+
test-static-llama-qnn-linux:
441+
name: test-static-llama-qnn-linux
442+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
443+
permissions:
444+
id-token: write
445+
contents: read
446+
strategy:
447+
fail-fast: false
448+
with:
449+
runner: linux.2xlarge
450+
docker-image: executorch-ubuntu-22.04-qnn-sdk
451+
submodules: 'true'
452+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
453+
timeout: 180
454+
script: |
455+
# The generic Linux job chooses to use base env, not the one setup by the image
456+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
457+
conda activate "${CONDA_ENV}"
458+
459+
BUILD_TOOL="cmake"
460+
461+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
462+
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
463+
464+
# Setup executorch
465+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
466+
467+
# Setup install_requirements for llama
468+
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
469+
470+
# Test static llama weight sharing and accuracy
471+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama.sh
472+
440473
test-qnn-models-linux:
441474
name: test-qnn-models-linux
442475
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main

.lintrunner.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ exclude_patterns = [
7878
# File contains @generated
7979
'extension/llm/custom_ops/spinquant/fast_hadamard_transform_special.h',
8080
'extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_special_unstrided_cpu.h',
81+
# Want to be able to keep c10 in sync with PyTorch core.
82+
'runtime/core/portable_type/c10/**',
8183
]
8284
command = [
8385
'python',
@@ -261,6 +263,8 @@ exclude_patterns = [
261263
'extension/**',
262264
'kernels/optimized/**',
263265
'runtime/core/exec_aten/**',
266+
# Want to be able to keep c10 in sync with PyTorch core.
267+
'runtime/core/portable_type/c10/**',
264268
'runtime/executor/tensor_parser_aten.cpp',
265269
'scripts/**',
266270
'test/**',

CMakeLists.txt

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ if(NOT "${_repo_dir_name}" STREQUAL "executorch")
373373
"fix for this restriction."
374374
)
375375
endif()
376-
set(_common_include_directories ${CMAKE_CURRENT_SOURCE_DIR}/..)
376+
set(_common_include_directories ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/runtime/core/portable_type)
377377

378378
#
379379
# The `_<target>_srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}.
@@ -556,6 +556,7 @@ endif()
556556
target_include_directories(
557557
executorch_core PUBLIC ${_common_include_directories}
558558
)
559+
target_compile_definitions(executorch_core PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)
559560
target_compile_options(executorch_core PUBLIC ${_common_compile_options})
560561
if(MAX_KERNEL_NUM)
561562
target_compile_definitions(
@@ -576,6 +577,7 @@ if(EXECUTORCH_BUILD_PYBIND AND APPLE)
576577
target_include_directories(
577578
executorch_core_shared PUBLIC ${_common_include_directories}
578579
)
580+
target_compile_definitions(executorch_core_shared PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)
579581
target_compile_options(
580582
executorch_core_shared PUBLIC ${_common_compile_options}
581583
)
@@ -594,8 +596,9 @@ endif()
594596
# any backends.
595597
#
596598
add_library(executorch ${_executorch__srcs})
597-
target_link_libraries(executorch PRIVATE executorch_core)
599+
target_link_libraries(executorch PUBLIC executorch_core)
598600
target_include_directories(executorch PUBLIC ${_common_include_directories})
601+
target_compile_definitions(executorch PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)
599602
target_compile_options(executorch PUBLIC ${_common_compile_options})
600603
target_link_options_shared_lib(executorch)
601604

@@ -629,6 +632,12 @@ endif()
629632

630633
# Install `executorch` library as well as `executorch-config.cmake` under
631634
# ${CMAKE_INSTALL_PREFIX}/
635+
install(DIRECTORY runtime/core/ DESTINATION include/executorch/runtime/core FILES_MATCHING PATTERN "*.h")
636+
install(DIRECTORY runtime/kernel/ DESTINATION include/executorch/runtime/kernel FILES_MATCHING PATTERN "*.h")
637+
install(DIRECTORY runtime/platform/ DESTINATION include/executorch/runtime/platform FILES_MATCHING PATTERN "*.h")
638+
install(DIRECTORY extension/kernel_util/ DESTINATION include/executorch/extension/kernel_util FILES_MATCHING PATTERN "*.h")
639+
install(DIRECTORY extension/tensor/ DESTINATION include/executorch/extension/tensor FILES_MATCHING PATTERN "*.h")
640+
install(DIRECTORY extension/threadpool/ DESTINATION include/executorch/extension/threadpool FILES_MATCHING PATTERN "*.h")
632641
install(
633642
TARGETS executorch executorch_core
634643
DESTINATION lib
@@ -792,6 +801,8 @@ if(EXECUTORCH_BUILD_PYBIND)
792801
target_include_directories(
793802
util PUBLIC ${_common_include_directories} ${TORCH_INCLUDE_DIRS}
794803
)
804+
target_compile_definitions(util PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)
805+
795806
target_compile_options(util PUBLIC ${_pybind_compile_options})
796807
target_link_libraries(util PRIVATE torch c10 executorch extension_tensor)
797808

backends/apple/coreml/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ target_include_directories(
134134
coremldelegate PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/runtime/util
135135
)
136136
target_include_directories(coremldelegate PRIVATE ${EXECUTORCH_ROOT}/..)
137+
target_include_directories(coremldelegate PRIVATE ${EXECUTORCH_ROOT}/runtime/core/portable_type)
138+
target_compile_definitions(coremldelegate PRIVATE C10_USING_CUSTOM_GENERATED_MACROS)
137139
target_link_libraries(coremldelegate PRIVATE executorch_core)
138140

139141
if(EXECUTORCH_BUILD_DEVTOOLS)

backends/apple/coreml/runtime/workspace/executorchcoreml.xcodeproj/project.pbxproj

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -830,6 +830,7 @@
830830
GCC_OPTIMIZATION_LEVEL = 0;
831831
GCC_PREPROCESSOR_DEFINITIONS = (
832832
"DEBUG=1",
833+
"C10_USING_CUSTOM_GENERATED_MACROS",
833834
"$(inherited)",
834835
);
835836
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
@@ -911,6 +912,7 @@
911912
DEVELOPMENT_TEAM = "";
912913
GCC_PREPROCESSOR_DEFINITIONS = (
913914
"DEBUG=1",
915+
"C10_USING_CUSTOM_GENERATED_MACROS",
914916
"ET_EVENT_TRACER_ENABLED=1",
915917
"$(inherited)",
916918
);
@@ -920,6 +922,7 @@
920922
"$(SRCROOT)/../kvstore",
921923
"$(SRCROOT)/../inmemoryfs",
922924
"$(SRCROOT)/../include",
925+
"$(SRCROOT)/../include/executorch/runtime/core/portable_type",
923926
"$(SRCROOT)/../sdk",
924927
"$(SRCROOT)/../util",
925928
"$(SRCROOT)/../../third-party/nlohmann_json/single_include",
@@ -951,6 +954,7 @@
951954
"$(SRCROOT)/../kvstore",
952955
"$(SRCROOT)/../inmemoryfs",
953956
"$(SRCROOT)/../include",
957+
"$(SRCROOT)/../include/executorch/runtime/core/portable_type",
954958
"$(SRCROOT)/../sdk",
955959
"$(SRCROOT)/../util",
956960
"$(SRCROOT)/../../third-party/nlohmann_json/single_include",

backends/arm/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ endif()
1414

1515
include(${EXECUTORCH_ROOT}/build/Utils.cmake)
1616

17-
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
17+
set(_common_include_directories ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type)
18+
add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS)
1819

1920
# Third-party folder and Ethos-U driver inclued
2021
set(THIRD_PARTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/third-party")

backends/arm/arm_partitioner.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@
77

88
import logging
99
import os
10-
from typing import Callable, final, List, Optional, Tuple
10+
from typing import Callable, final, List, Optional, Sequence, Tuple
1111

1212
import torch
1313
from executorch.backends.arm.arm_backend import ( # type: ignore[attr-defined]
1414
ArmBackend,
1515
) # usort: skip
1616
from executorch.backends.arm.operator_support.tosa_supported_operators import (
17-
TOSASupportedOperators,
17+
tosa_support_factory,
1818
)
1919
from executorch.backends.arm.tosa_specification import TosaSpecification
2020
from executorch.exir.backend.compile_spec_schema import CompileSpec
@@ -27,6 +27,8 @@
2727
from executorch.exir.dialects._ops import ops as exir_ops
2828
from torch.export.exported_program import ExportedProgram
2929
from torch.fx.passes.infra.partitioner import CapabilityBasedPartitioner
30+
from torch.fx.passes.operator_support import OperatorSupportBase
31+
3032

3133
logger = logging.getLogger(__name__)
3234
logger.setLevel(logging.WARNING)
@@ -54,8 +56,13 @@ def is_dequant_node(node: torch.fx.node.Node) -> bool:
5456

5557
@final
5658
class ArmPartitioner(Partitioner):
57-
def __init__(self, compile_spec: List[CompileSpec]) -> None:
59+
def __init__(
60+
self,
61+
compile_spec: List[CompileSpec],
62+
additional_checks: Optional[Sequence[OperatorSupportBase]] = None,
63+
) -> None:
5864
self.delegation_spec = DelegationSpec(ArmBackend.__name__, compile_spec)
65+
self.additional_checks = additional_checks
5966

6067
def partition(self, exported_program: ExportedProgram) -> PartitionResult:
6168
# Run the CapabilityBasedPartitioner to return the largest possible
@@ -72,7 +79,7 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
7279

7380
capability_partitioner = CapabilityBasedPartitioner(
7481
exported_program.graph_module,
75-
TOSASupportedOperators(tosa_spec),
82+
tosa_support_factory(tosa_spec, self.additional_checks),
7683
allows_single_node_partition=True,
7784
)
7885
partition_list = capability_partitioner.propose_partitions()

backends/arm/operator_support/convolution_support.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class ConvolutionSupported(SupportedTOSAOperatorCheck):
2424
TosaSpecification.create_from_string("TOSA-0.80+MI"),
2525
]
2626

27-
def is_node_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
27+
def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
2828

2929
# Not implemented
3030
transposed = cast(bool, node.args[6])

backends/arm/operator_support/pool_2d_support.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ class AvgPool2dSupported(SupportedTOSAOperatorCheck):
4343
TosaSpecification.create_from_string("TOSA-0.80+MI"),
4444
]
4545

46-
def is_node_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
46+
def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
4747
if not (isinstance(tosa_spec, Tosa_0_80) and tosa_spec.is_U55_subset):
4848
return True
4949

@@ -73,7 +73,7 @@ class MaxPool2dSupported(SupportedTOSAOperatorCheck):
7373
TosaSpecification.create_from_string("TOSA-0.80+MI"),
7474
]
7575

76-
def is_node_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
76+
def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
7777
if not (isinstance(tosa_spec, Tosa_0_80) and tosa_spec.is_U55_subset):
7878
return True
7979

backends/arm/operator_support/reduce_sum_support.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class SumSupported(SupportedTOSAOperatorCheck):
2323
TosaSpecification.create_from_string("TOSA-0.80+MI"),
2424
]
2525

26-
def is_node_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
26+
def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
2727
if not (isinstance(tosa_spec, Tosa_0_80) and tosa_spec.is_U55_subset):
2828
return True
2929

backends/arm/operator_support/right_shift_support.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2024 Arm Limited and/or its affiliates.
1+
# Copyright 2024-2025 Arm Limited and/or its affiliates.
22
#
33
# This source code is licensed under the BSD-style license found in the
44
# LICENSE file in the root directory of this source tree.
@@ -29,7 +29,7 @@ class RightShiftSupported(SupportedTOSAOperatorCheck):
2929
TosaSpecification.create_from_string("TOSA-0.80+MI"),
3030
]
3131

32-
def is_node_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
32+
def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
3333

3434
# TODO MLETORCH-525 Remove warning
3535
if isinstance(tosa_spec, Tosa_0_80) and tosa_spec.is_U55_subset:

backends/arm/operator_support/to_copy_support.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,9 @@ def _merge_supported_types(
7070
)
7171
POSSIBLE_TYPE_CONVERSIONS = {torch.int64: torch.int32}
7272

73-
def is_node_supported(self, node: fx.Node, tosa_spec: TosaSpecification) -> bool:
73+
def is_node_tosa_supported(
74+
self, node: fx.Node, tosa_spec: TosaSpecification
75+
) -> bool:
7476
assert node.target in self.targets
7577

7678
if tosa_spec not in self.tosa_specs:

0 commit comments

Comments
 (0)