Skip to content

Commit e1862fc

Browse files
committed
Update on "stop double-installing ExecuTorch in one-off linux jobs"
setup-linux.sh already installs ExecuTorch with XNNPACK (and it passes use-pt-pinned-commit as it should). Differential Revision: [D67996460](https://our.internmc.facebook.com/intern/diff/D67996460/) [ghstack-poisoned]
2 parents d09a372 + 93fcdc2 commit e1862fc

File tree

123 files changed

+3944
-3905
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

123 files changed

+3944
-3905
lines changed

.github/pytorch-probot.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# The schema is from https://github.com/pytorch/pytorch/blob/main/.github/pytorch-probot.yml
2+
tracking_issue: 7679
23
ciflow_push_tags:
34
- ciflow/android
45
- ciflow/apple

.github/workflows/android-perf.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ jobs:
260260
--output_name="${OUT_ET_MODEL_NAME}.pte"
261261
ls -lh "${OUT_ET_MODEL_NAME}.pte"
262262
elif [[ ${{ matrix.config }} == "llama3_qnn_htp" ]]; then
263-
export QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728
263+
export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
264264
export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
265265
export PYTHONPATH=$(pwd)/..
266266
@@ -347,7 +347,7 @@ jobs:
347347
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
348348
349349
export ANDROID_ABIS="arm64-v8a"
350-
PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728 bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
350+
PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
351351
352352
# Let's see how expensive this job is, we might want to tone it down by running it periodically
353353
benchmark-on-device:

.github/workflows/pull.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,9 @@ jobs:
330330

331331
unittest-arm:
332332
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
333+
permissions:
334+
id-token: write
335+
contents: read
333336
with:
334337
runner: linux.2xlarge
335338
docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -392,6 +395,25 @@ jobs:
392395
# Test llama2
393396
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
394397
398+
test-qnn-models-linux:
399+
name: test-qnn-models-linux
400+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
401+
strategy:
402+
fail-fast: false
403+
with:
404+
runner: linux.2xlarge
405+
docker-image: executorch-ubuntu-22.04-qnn-sdk
406+
submodules: 'true'
407+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
408+
timeout: 180
409+
script: |
410+
# The generic Linux job chooses to use base env, not the one setup by the image
411+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
412+
conda activate "${CONDA_ENV}"
413+
414+
# placeholder for running test_qnn_delegate.py, can use matrix such that we can trigger different jobs, refers to test-llama-runner-qnn-linux
415+
# reminder: make sure each job runs fast
416+
395417
test-phi-3-mini-runner-linux:
396418
name: test-phi-3-mini-runner-linux
397419
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main

.github/workflows/trunk.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ jobs:
132132
test-arm-backend-delegation:
133133
name: test-arm-backend-delegation
134134
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
135+
permissions:
136+
id-token: write
137+
contents: read
135138
with:
136139
runner: linux.2xlarge
137140
docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -159,6 +162,9 @@ jobs:
159162
test-arm-reference-delegation:
160163
name: test-arm-reference-delegation
161164
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
165+
permissions:
166+
id-token: write
167+
contents: read
162168
with:
163169
runner: linux.2xlarge
164170
docker-image: executorch-ubuntu-22.04-arm-sdk

.lintrunner.toml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,6 @@ exclude_patterns = [
7878
# File contains @generated
7979
'extension/llm/custom_ops/spinquant/fast_hadamard_transform_special.h',
8080
'extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_special_unstrided_cpu.h',
81-
# Want to be able to keep c10 in sync with PyTorch core.
82-
'runtime/core/portable_type/c10/**',
8381
]
8482
command = [
8583
'python',
@@ -263,8 +261,6 @@ exclude_patterns = [
263261
'extension/**',
264262
'kernels/optimized/**',
265263
'runtime/core/exec_aten/**',
266-
# Want to be able to keep c10 in sync with PyTorch core.
267-
'runtime/core/portable_type/c10/**',
268264
'runtime/executor/tensor_parser_aten.cpp',
269265
'scripts/**',
270266
'test/**',
@@ -298,6 +294,7 @@ include_patterns = [
298294
'build/**/*.py',
299295
'codegen/**/*.py',
300296
# 'devtools/**/*.py',
297+
'devtools/visualization/**/*.py',
301298
'docs/**/*.py',
302299
# 'examples/**/*.py',
303300
# 'exir/**/*.py',

CMakeLists.txt

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ if(NOT "${_repo_dir_name}" STREQUAL "executorch")
361361
"fix for this restriction."
362362
)
363363
endif()
364-
set(_common_include_directories ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/runtime/core/portable_type)
364+
set(_common_include_directories ${CMAKE_CURRENT_SOURCE_DIR}/..)
365365

366366
#
367367
# The `_<target>_srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}.
@@ -544,7 +544,6 @@ endif()
544544
target_include_directories(
545545
executorch_core PUBLIC ${_common_include_directories}
546546
)
547-
target_compile_definitions(executorch_core PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)
548547
target_compile_options(executorch_core PUBLIC ${_common_compile_options})
549548
if(MAX_KERNEL_NUM)
550549
target_compile_definitions(
@@ -565,7 +564,6 @@ if(EXECUTORCH_BUILD_PYBIND AND APPLE)
565564
target_include_directories(
566565
executorch_core_shared PUBLIC ${_common_include_directories}
567566
)
568-
target_compile_definitions(executorch_core_shared PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)
569567
target_compile_options(
570568
executorch_core_shared PUBLIC ${_common_compile_options}
571569
)
@@ -586,7 +584,6 @@ endif()
586584
add_library(executorch ${_executorch__srcs})
587585
target_link_libraries(executorch PRIVATE executorch_core)
588586
target_include_directories(executorch PUBLIC ${_common_include_directories})
589-
target_compile_definitions(executorch PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)
590587
target_compile_options(executorch PUBLIC ${_common_compile_options})
591588
target_link_options_shared_lib(executorch)
592589

@@ -620,12 +617,6 @@ endif()
620617

621618
# Install `executorch` library as well as `executorch-config.cmake` under
622619
# ${CMAKE_INSTALL_PREFIX}/
623-
install(DIRECTORY runtime/core/ DESTINATION include/executorch/runtime/core FILES_MATCHING PATTERN "*.h")
624-
install(DIRECTORY runtime/kernel/ DESTINATION include/executorch/runtime/kernel FILES_MATCHING PATTERN "*.h")
625-
install(DIRECTORY runtime/platform/ DESTINATION include/executorch/runtime/platform FILES_MATCHING PATTERN "*.h")
626-
install(DIRECTORY extension/kernel_util/ DESTINATION include/executorch/extension/kernel_util FILES_MATCHING PATTERN "*.h")
627-
install(DIRECTORY extension/tensor/ DESTINATION include/executorch/extension/tensor FILES_MATCHING PATTERN "*.h")
628-
install(DIRECTORY extension/threadpool/ DESTINATION include/executorch/extension/threadpool FILES_MATCHING PATTERN "*.h")
629620
install(
630621
TARGETS executorch executorch_core
631622
DESTINATION lib
@@ -784,8 +775,6 @@ if(EXECUTORCH_BUILD_PYBIND)
784775
target_include_directories(
785776
util PUBLIC ${_common_include_directories} ${TORCH_INCLUDE_DIRS}
786777
)
787-
target_compile_definitions(util PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)
788-
789778
target_compile_options(util PUBLIC ${_pybind_compile_options})
790779
target_link_libraries(util PRIVATE torch c10 executorch extension_tensor)
791780

backends/apple/coreml/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,6 @@ target_include_directories(
134134
coremldelegate PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/runtime/util
135135
)
136136
target_include_directories(coremldelegate PRIVATE ${EXECUTORCH_ROOT}/..)
137-
target_include_directories(coremldelegate PRIVATE ${EXECUTORCH_ROOT}/runtime/core/portable_type)
138-
target_compile_definitions(coremldelegate PRIVATE C10_USING_CUSTOM_GENERATED_MACROS)
139137
target_link_libraries(coremldelegate PRIVATE executorch_core)
140138

141139
if(EXECUTORCH_BUILD_DEVTOOLS)

backends/apple/coreml/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,14 +93,14 @@ class Model(torch.nn.Module):
9393
source_model = Model()
9494
example_inputs = (torch.randn((1, 3, 256, 256)), )
9595

96-
pre_autograd_aten_dialect = export_for_training(model, example_inputs).module()
96+
pre_autograd_aten_dialect = export_for_training(source_model, example_inputs).module()
9797

9898
quantization_config = LinearQuantizerConfig.from_dict(
9999
{
100100
"global_config": {
101101
"quantization_scheme": QuantizationScheme.symmetric,
102-
"activation_dtype": torch.uint8,
103-
"weight_dtype": torch.int8,
102+
"activation_dtype": torch.quint8,
103+
"weight_dtype": torch.qint8,
104104
"weight_per_channel": True,
105105
}
106106
}

backends/apple/coreml/runtime/workspace/executorchcoreml.xcodeproj/project.pbxproj

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -830,7 +830,6 @@
830830
GCC_OPTIMIZATION_LEVEL = 0;
831831
GCC_PREPROCESSOR_DEFINITIONS = (
832832
"DEBUG=1",
833-
"C10_USING_CUSTOM_GENERATED_MACROS",
834833
"$(inherited)",
835834
);
836835
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
@@ -912,7 +911,6 @@
912911
DEVELOPMENT_TEAM = "";
913912
GCC_PREPROCESSOR_DEFINITIONS = (
914913
"DEBUG=1",
915-
"C10_USING_CUSTOM_GENERATED_MACROS",
916914
"ET_EVENT_TRACER_ENABLED=1",
917915
"$(inherited)",
918916
);
@@ -922,7 +920,6 @@
922920
"$(SRCROOT)/../kvstore",
923921
"$(SRCROOT)/../inmemoryfs",
924922
"$(SRCROOT)/../include",
925-
"$(SRCROOT)/../include/executorch/runtime/core/portable_type",
926923
"$(SRCROOT)/../sdk",
927924
"$(SRCROOT)/../util",
928925
"$(SRCROOT)/../../third-party/nlohmann_json/single_include",
@@ -954,7 +951,6 @@
954951
"$(SRCROOT)/../kvstore",
955952
"$(SRCROOT)/../inmemoryfs",
956953
"$(SRCROOT)/../include",
957-
"$(SRCROOT)/../include/executorch/runtime/core/portable_type",
958954
"$(SRCROOT)/../sdk",
959955
"$(SRCROOT)/../util",
960956
"$(SRCROOT)/../../third-party/nlohmann_json/single_include",

backends/arm/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,7 @@ endif()
1414

1515
include(${EXECUTORCH_ROOT}/build/Utils.cmake)
1616

17-
set(_common_include_directories ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type)
18-
add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS)
17+
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
1918

2019
# Third-party folder and Ethos-U driver inclued
2120
set(THIRD_PARTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/third-party")

backends/arm/_passes/arm_pass_manager.py

Lines changed: 63 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
# pyre-unsafe
99

10-
import torch
1110
from executorch.backends.arm._passes.annotate_channels_last_dim_order_pass import (
1211
AnnotateChannelsLastDimOrder,
1312
)
@@ -47,7 +46,7 @@
4746
)
4847
from executorch.backends.arm._passes.match_arg_ranks_pass import MatchArgRanksPass
4948
from executorch.backends.arm._passes.meandim_to_averagepool_pass import (
50-
ConvertMeanDimToAveragePool,
49+
ConvertMeanDimToAveragePoolPass,
5150
)
5251
from executorch.backends.arm._passes.mm_to_bmm_pass import ConvertMmToBmmPass
5352
from executorch.backends.arm._passes.remove_clone_pass import RemoveClonePass
@@ -61,86 +60,98 @@
6160
from executorch.backends.arm._passes.unsqueeze_scalar_placeholders_pass import (
6261
UnsqueezeScalarPlaceholdersPass,
6362
)
63+
from executorch.backends.arm.tosa_specification import TosaSpecification
6464
from executorch.backends.xnnpack._passes.remove_getitem_op import RemoveGetItemPass
6565
from executorch.exir import ExportedProgram
66-
from executorch.exir.dialects._ops import ops as exir_ops
6766
from executorch.exir.pass_manager import PassManager
67+
from torch.fx import GraphModule
6868

6969

7070
class ArmPassManager(PassManager):
7171

72-
def _transform(self, graph_module: torch.fx.GraphModule):
72+
def __init__(self, tosa_spec: TosaSpecification) -> None:
73+
self.tosa_spec = tosa_spec
74+
super().__init__()
75+
76+
def _transform(self, graph_module: GraphModule):
7377
return self(graph_module).graph_module
7478

75-
def transform_to_backend_pipeline(self, exported_program: ExportedProgram):
76-
"""Apply passes before transforming program to backend"""
79+
def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
7780
self.add_pass(FuseQuantizedActivationPass())
81+
self.add_pass(RemoveGetItemPass())
82+
self.add_pass(ConvertSplitToSlicePass())
83+
self.add_pass(ConvertMmToBmmPass())
7884
self.add_pass(DecomposeLinearPass())
85+
self.add_pass(ConvertMeanDimToAveragePoolPass())
86+
87+
self.add_pass(AnnotateDecomposedMatmulPass())
88+
self.add_pass(QuantizeFullArgument())
89+
self.add_pass(FoldAndAnnotateQParamsPass())
90+
self.add_pass(RetraceFoldedDtypesPass())
91+
self.add_pass(InsertTableOpsPass(exported_program))
92+
93+
self.add_pass(RemoveClonePass())
94+
self.add_pass(SizeAdjustConv2DPass())
95+
self.add_pass(ConvertExpandCopyToRepeatPass())
96+
self.add_pass(UnsqueezeBeforeRepeatPass())
97+
self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
98+
self.add_pass(CastInt64ToInt32Pass(exported_program))
99+
self.add_pass(MatchArgRanksPass(exported_program))
100+
self.add_pass(KeepDimsFalseToSqueezePass())
101+
self.add_pass(Conv1dUnsqueezePass(exported_program))
102+
self.add_pass(DecomposeSelectPass())
103+
104+
self.add_pass(AnnotateChannelsLastDimOrder())
105+
106+
return self._transform(exported_program.graph_module)
107+
108+
def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
109+
110+
self.add_pass(FuseQuantizedActivationPass())
79111
self.add_pass(RemoveGetItemPass())
112+
self.add_pass(ConvertSplitToSlicePass())
113+
self.add_pass(ConvertMmToBmmPass())
114+
self.add_pass(DecomposeLinearPass())
80115
self.add_pass(DecomposeLayerNormPass())
81116
self.add_pass(DecomposeVarPass())
82-
self.add_pass(ConvertMeanDimToAveragePool())
83117
self.add_pass(DecomposeMeanDimPass())
84-
self.add_pass(ConvertSplitToSlicePass())
85-
self.add_pass(ConvertMmToBmmPass())
86-
# TODO MLETORCH-558
118+
self.add_pass(ConvertMeanDimToAveragePoolPass())
119+
self.add_pass(DecomposeDivPass())
120+
self.add_pass(DecomposeSoftmaxesPass())
121+
87122
self.add_pass(AnnotateDecomposedMatmulPass())
88123
self.add_pass(QuantizeFullArgument())
89-
self.add_pass(
90-
FoldAndAnnotateQParamsPass(
91-
[
92-
exir_ops.edge.aten.minimum.default,
93-
exir_ops.edge.aten.maximum.default,
94-
exir_ops.edge.aten.add.Tensor,
95-
exir_ops.edge.aten.avg_pool2d.default,
96-
exir_ops.edge.aten.bmm.default,
97-
exir_ops.edge.aten.cat.default,
98-
exir_ops.edge.aten.convolution.default,
99-
exir_ops.edge.aten.clone.default,
100-
exir_ops.edge.aten.exp.default,
101-
exir_ops.edge.aten.expand_copy.default,
102-
exir_ops.edge.aten.full.default,
103-
exir_ops.edge.aten.hardtanh.default,
104-
exir_ops.edge.aten.log.default,
105-
exir_ops.edge.aten.max_pool2d.default,
106-
exir_ops.edge.aten.mul.Tensor,
107-
exir_ops.edge.aten.permute_copy.default,
108-
exir_ops.edge.aten.reciprocal.default,
109-
exir_ops.edge.aten.relu.default,
110-
exir_ops.edge.aten.repeat.default,
111-
exir_ops.edge.aten.rsqrt.default,
112-
exir_ops.edge.aten.select_copy.int,
113-
exir_ops.edge.aten.sigmoid.default,
114-
exir_ops.edge.aten.slice_copy.Tensor,
115-
exir_ops.edge.aten.squeeze_copy.dims,
116-
exir_ops.edge.aten.sub.Tensor,
117-
exir_ops.edge.aten.sum.dim_IntList,
118-
exir_ops.edge.aten.tanh.default,
119-
exir_ops.edge.aten.unsqueeze_copy.default,
120-
exir_ops.edge.aten.upsample_nearest2d.vec,
121-
exir_ops.edge.aten.view_copy.default,
122-
]
123-
)
124-
)
124+
self.add_pass(FoldAndAnnotateQParamsPass())
125125
self.add_pass(RetraceFoldedDtypesPass())
126126
self.add_pass(InsertTableOpsPass(exported_program))
127+
128+
self.add_pass(RemoveClonePass())
129+
self.add_pass(SizeAdjustConv2DPass())
127130
self.add_pass(ConvertExpandCopyToRepeatPass())
128131
self.add_pass(UnsqueezeBeforeRepeatPass())
129-
self.add_pass(CastInt64ToInt32Pass(exported_program))
130132
self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
131-
self.add_pass(SizeAdjustConv2DPass())
132-
self.add_pass(RemoveClonePass())
133+
self.add_pass(CastInt64ToInt32Pass(exported_program))
133134
self.add_pass(MatchArgRanksPass(exported_program))
134-
self.add_pass(DecomposeDivPass())
135135
self.add_pass(KeepDimsFalseToSqueezePass())
136136
self.add_pass(Conv1dUnsqueezePass(exported_program))
137-
self.add_pass(DecomposeSoftmaxesPass())
138137
self.add_pass(DecomposeSelectPass())
138+
139139
self.add_pass(AnnotateChannelsLastDimOrder())
140140

141141
return self._transform(exported_program.graph_module)
142142

143-
def transform_for_annotation_pipeline(self, graph_module: torch.fx.GraphModule):
143+
def transform_to_backend_pipeline(self, exported_program: ExportedProgram):
144+
"""Apply passes before transforming program to backend"""
145+
if self.tosa_spec == TosaSpecification.create_from_string("TOSA-0.80.0+BI"):
146+
return self._tosa_080_BI_pipeline(exported_program)
147+
elif self.tosa_spec == TosaSpecification.create_from_string("TOSA-0.80.0+MI"):
148+
return self._tosa_080_MI_pipeline(exported_program)
149+
else:
150+
raise NotImplementedError(
151+
f"No pass pipeline implemented for {self.tosa_spec=}"
152+
)
153+
154+
def transform_for_annotation_pipeline(self, graph_module: GraphModule):
144155
self.add_pass(ScalarsToAttributePass())
145156
self.add_pass(DecomposeLayerNormPass())
146157
self.add_pass(DecomposeVarPass())

0 commit comments

Comments
 (0)