Skip to content

Commit 4372d61

Browse files
committed
Update on "Add new export LLM config"
Differential Revision: [D75263991](https://our.internmc.facebook.com/intern/diff/D75263991) [ghstack-poisoned]
2 parents 0bf2ea4 + dfbb585 commit 4372d61

File tree

260 files changed

+8436
-2547
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

260 files changed

+8436
-2547
lines changed

.ci/scripts/build-qnn-sdk.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ set_up_aot() {
3232
-DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
3333
-DEXECUTORCH_BUILD_DEVTOOLS=ON \
3434
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
35+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
36+
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
3537
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
3638
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
3739
-DPYTHON_EXECUTABLE=python3

.ci/scripts/build_llama_android.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ build_llama_runner() {
4242
popd
4343
ANDROID_ABI=arm64-v8a
4444
cmake -DBUCK2="${BUCK2}" \
45+
-DBUILD_TESTING=OFF \
4546
-DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK"/build/cmake/android.toolchain.cmake \
4647
-DANDROID_ABI="${ANDROID_ABI}" \
4748
-DCMAKE_INSTALL_PREFIX=cmake-android-out \

.ci/scripts/test_llama.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ cmake_build_llama_runner() {
169169
popd
170170
dir="examples/models/llama"
171171
retry cmake \
172+
-DBUILD_TESTING=OFF \
172173
-DCMAKE_INSTALL_PREFIX=cmake-out \
173174
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
174175
-Bcmake-out/${dir} \

.ci/scripts/test_llama_torchao_lowbit.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ cmake -DPYTHON_EXECUTABLE=python \
3030
-DCMAKE_BUILD_TYPE=Release \
3131
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
3232
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
33+
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
3334
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
3435
-DEXECUTORCH_BUILD_XNNPACK=OFF \
3536
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
@@ -40,6 +41,7 @@ cmake --build cmake-out -j16 --target install --config Release
4041

4142
# Install llama runner with torchao
4243
cmake -DPYTHON_EXECUTABLE=python \
44+
-DBUILD_TESTING=OFF \
4345
-DCMAKE_BUILD_TYPE=Release \
4446
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
4547
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \

.ci/scripts/test_llava.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ EXECUTORCH_COMMON_CMAKE_ARGS=" \
3737
-DEXECUTORCH_ENABLE_LOGGING=ON \
3838
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
3939
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
40+
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
4041
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
4142
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
4243
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
@@ -64,9 +65,10 @@ cmake_install_executorch_libraries_for_android() {
6465

6566

6667
LLAVA_COMMON_CMAKE_ARGS=" \
68+
-DBUILD_TESTING=OFF \
6769
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
6870
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
69-
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
71+
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
7072
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
7173
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
7274
-DEXECUTORCH_BUILD_XNNPACK=ON"

.ci/scripts/test_phi_3_mini.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ cmake_install_executorch_libraries() {
2727
-DEXECUTORCH_ENABLE_LOGGING=1 \
2828
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
2929
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
30+
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
3031
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
3132
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
3233
-DEXECUTORCH_BUILD_XNNPACK=ON \

.github/workflows/pull.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ jobs:
371371
size=${arr[4]}
372372
# threshold=48120 on devserver with gcc11.4
373373
# todo(lfq): update once binary size is below 50kb.
374-
threshold="55504"
374+
threshold="55584"
375375
if [[ "$size" -le "$threshold" ]]; then
376376
echo "Success $size <= $threshold"
377377
else
@@ -406,7 +406,7 @@ jobs:
406406
output=$(ls -la cmake-out/test/size_test)
407407
arr=($output)
408408
size=${arr[4]}
409-
threshold="51656"
409+
threshold="51728"
410410
if [[ "$size" -le "$threshold" ]]; then
411411
echo "Success $size <= $threshold"
412412
else

.github/workflows/trunk.yml

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ jobs:
262262
output=$(ls -la ${elf})
263263
arr=($output)
264264
size=${arr[4]}
265-
threshold="102400" # 100KiB
265+
threshold="103068" # ~100KiB
266266
echo "size: $size, threshold: $threshold"
267267
if [[ "$size" -le "$threshold" ]]; then
268268
echo "Success $size <= $threshold"
@@ -552,6 +552,7 @@ jobs:
552552
-DEXECUTORCH_ENABLE_LOGGING=1 \
553553
-DCMAKE_BUILD_TYPE=Release \
554554
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
555+
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
555556
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
556557
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
557558
-DEXECUTORCH_BUILD_XNNPACK=ON \
@@ -686,3 +687,32 @@ jobs:
686687
build-mode: Release
687688
build-tool: cmake
688689
docker-image: executorch-ubuntu-22.04-clang12
690+
691+
unittest-nxp-neutron:
692+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
693+
permissions:
694+
id-token: write
695+
contents: read
696+
with:
697+
runner: linux.2xlarge
698+
docker-image: executorch-ubuntu-22.04-clang12
699+
submodules: 'recursive'
700+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
701+
timeout: 90
702+
script: |
703+
set -eux
704+
705+
# The generic Linux job chooses to use base env, not the one setup by the image
706+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
707+
conda activate "${CONDA_ENV}"
708+
709+
# Build and install Executorch
710+
PYTHON_EXECUTABLE=python \
711+
CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \
712+
.ci/scripts/setup-linux.sh --build-tool "cmake"
713+
714+
# Install test requirements
715+
pip install -r backends/nxp/requirements-tests.txt
716+
717+
# Run pytest
718+
PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh

.lintrunner.toml

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -386,15 +386,9 @@ exclude_patterns = [
386386
"third-party/**",
387387
# TODO: remove exceptions as we migrate
388388
# backends
389-
"backends/vulkan/quantizer/**",
390-
"backends/vulkan/test/**",
391-
"backends/xnnpack/quantizer/**",
392-
"backends/xnnpack/test/**",
393-
"exir/tests/test_passes.py",
394-
"extension/llm/export/builder.py",
395-
"extension/llm/export/quantizer_lib.py",
396389
"exir/tests/test_memory_planning.py",
397390
"exir/backend/test/demos/test_xnnpack_qnnpack.py",
391+
"backends/xnnpack/test/test_xnnpack_utils.py",
398392
]
399393

400394
command = [

CMakeLists.txt

Lines changed: 6 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,13 @@ if(NOT PYTHON_EXECUTABLE)
7575
endif()
7676
announce_configured_options(PYTHON_EXECUTABLE)
7777

78+
if(NOT BUCK2)
79+
resolve_buck2()
80+
endif()
81+
announce_configured_options(BUCK2)
82+
7883
announce_configured_options(CMAKE_CXX_COMPILER_ID)
7984
announce_configured_options(CMAKE_TOOLCHAIN_FILE)
80-
announce_configured_options(BUCK2)
8185

8286
load_build_preset()
8387
include(${PROJECT_SOURCE_DIR}/tools/cmake/preset/default.cmake)
@@ -148,37 +152,11 @@ else()
148152
endif()
149153

150154
if(EXECUTORCH_BUILD_TESTS)
151-
set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
152155
include(CTest)
153156
endif()
154157

155158
add_subdirectory(third-party)
156159

157-
if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
158-
set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
159-
set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
160-
set(EXECUTORCH_BUILD_EXTENSION_MODULE ON)
161-
set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
162-
endif()
163-
164-
if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
165-
set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
166-
endif()
167-
168-
if(EXECUTORCH_BUILD_EXTENSION_MODULE)
169-
set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
170-
set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
171-
endif()
172-
173-
if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT)
174-
set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
175-
set(EXECUTORCH_BUILD_KERNELS_CUSTOM ON)
176-
endif()
177-
178-
if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
179-
set(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON)
180-
endif()
181-
182160
if(NOT DEFINED FXDIV_SOURCE_DIR)
183161
set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
184162
${CMAKE_POSITION_INDEPENDENT_CODE}
@@ -290,9 +268,6 @@ set(_common_include_directories
290268
#
291269

292270
if(NOT EXECUTORCH_SRCS_FILE)
293-
# Find or download buck2 binary.
294-
resolve_buck2()
295-
296271
# A file wasn't provided. Run a script to extract the source lists from the
297272
# buck2 build system and write them to a file we can include.
298273
#
@@ -335,7 +310,7 @@ if(EXECUTORCH_USE_CPP_CODE_COVERAGE)
335310
" -fprofile-instr-generate -fcoverage-mapping"
336311
)
337312
else()
338-
message(ERROR
313+
message(FATAL_ERROR
339314
"Code coverage for compiler ${CMAKE_CXX_COMPILER_ID} is unsupported"
340315
)
341316
endif()

backends/arm/_passes/annotate_channels_last_dim_order_pass.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,10 @@
3535
def _transpose_impl(*args, **kwargs):
3636
# Validate length of dim_order array
3737
dim = args[1]
38-
assert len(dim) in (4, 5)
38+
if len(dim) != 4 and len(dim) != 5:
39+
raise ValueError(
40+
f"Dim order length must be either 4 or 5, got {len(dim)}: {dim}"
41+
)
3942
# Pass-through in edge-IR
4043
return args[0]
4144

backends/arm/_passes/convert_split_to_slice.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,14 @@ def call(self, graph_module: torch.fx.GraphModule):
4141
dim = split_node.args[2] if len(split_node.args) > 2 else 0
4242
dim = (dim + rank) % rank
4343

44-
assert (
45-
sum(split_lengths) == shape[dim]
46-
), "Given split lengths don't sum up to the size of the dimension."
44+
# Validate that split lengths cover the entire dimension
45+
length_sum = sum(split_lengths)
46+
dim_size = shape[dim]
47+
if length_sum != dim_size:
48+
raise ValueError(
49+
f"Split sizes {split_lengths} sum to {length_sum}, "
50+
f"but dimension {dim} has size {dim_size}"
51+
)
4752

4853
# Convert split argument 'split_lengths' to slice arguments start and end.
4954
starts = [0] * len(split_lengths)

backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,9 @@ def fold_and_annotate_arg(
120120
if input_qparams is not None:
121121
node.meta["input_qparams"][i] = input_qparams
122122
for n in nodes_to_remove:
123-
assert n.target == dq_op
123+
if n.target != dq_op:
124+
raise RuntimeError(f"Expected {dq_op} dq_op, got {n.target}")
125+
124126
n.replace_all_uses_with(n.args[0]) # type: ignore[arg-type]
125127
graph_module.graph.erase_node(n)
126128

@@ -136,14 +138,16 @@ def call(self, graph_module: GraphModule) -> PassResult:
136138
continue
137139

138140
# Make sure we haven't already set qparams meta information on the node
139-
assert "input_qparams" not in n.meta, (
140-
f'Unexpected key "input_qparams" found in meta for node {n}. '
141-
"input_qparams should not have been set at this point"
142-
)
143-
assert "output_qparams" not in n.meta, (
144-
f'Unexpected key "output_qparams" found in meta for node {n}. '
145-
"output_qparams should not have been set at this point"
146-
)
141+
if "input_qparams" in n.meta:
142+
raise RuntimeError(
143+
f'Unexpected key "input_qparams" found in meta for node {n}. '
144+
"input_qparams should not have been set at this point"
145+
)
146+
if "output_qparams" in n.meta:
147+
raise RuntimeError(
148+
f'Unexpected key "output_qparams" found in meta for node {n}. '
149+
"output_qparams should not have been set at this point"
150+
)
147151

148152
# for the inputs and outputs search the graph for quantization info and
149153
# store the information in a dict with order of the _tensor_ inputs as key,

backends/arm/_passes/insert_table_ops.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,17 @@ def call(self, graph_module: GraphModule) -> PassResult:
240240
args=(node.args[0],),
241241
)
242242
output_node = table_node
243-
assert len(input_qparams) == 1
244-
assert len(output_qparams) == 1
243+
# Expect exactly one quantization parameter for input and output
244+
if len(input_qparams) != 1:
245+
raise ValueError(
246+
f"InsertTableOpsPass expected exactly one input quantization parameter, "
247+
f"got {len(input_qparams)} for node {node.name}"
248+
)
249+
if len(output_qparams) != 1:
250+
raise ValueError(
251+
f"InsertTableOpsPass expected exactly one output quantization parameter, "
252+
f"got {len(output_qparams)} for node {node.name}"
253+
)
245254

246255
# Generate table buffer and how much to lshift the table output.
247256
buffer, lshift = self.generate_table_values(

backends/arm/_passes/remove_clone_pass.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,8 @@ def call_operator(self, op, args, kwargs, meta):
1717
if op != exir_ops.edge.aten.clone.default:
1818
return super().call_operator(op, args, kwargs, meta)
1919

20-
assert len(args) == 1
20+
if len(args) != 1:
21+
raise ValueError(
22+
f"clone operator expects exactly one argument, got {len(args)}"
23+
)
2124
return args[0]

backends/arm/operators/op_abs.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def define_node(
4444
import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
4545

4646
validate_num_inputs(self.target, inputs, 1)
47-
validate_same_dtype(self.target, [*inputs, output])
47+
validate_same_dtype(self.target, [*inputs, output], ts)
4848

4949
# Handle int8 (quantized) and int32
5050
if not (inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]):
@@ -106,7 +106,7 @@ def define_node(
106106
import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
107107

108108
validate_num_inputs(self.target, inputs, 1)
109-
validate_same_dtype(self.target, [*inputs, output])
109+
validate_same_dtype(self.target, [*inputs, output], ts)
110110

111111
if inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]:
112112
# Call the inherited define_node for handling integers
@@ -153,7 +153,7 @@ def define_node(
153153
import serializer.tosa_serializer as ts # type: ignore
154154

155155
validate_num_inputs(self.target, inputs, 1)
156-
validate_same_dtype(self.target, [*inputs, output])
156+
validate_same_dtype(self.target, [*inputs, output], ts)
157157

158158
# Handle int8 (quantized) and int32
159159
if not (inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]):
@@ -216,7 +216,7 @@ def define_node(
216216
import serializer.tosa_serializer as ts # type: ignore
217217

218218
validate_num_inputs(self.target, inputs, 1)
219-
validate_same_dtype(self.target, [*inputs, output])
219+
validate_same_dtype(self.target, [*inputs, output], ts)
220220

221221
if inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]:
222222
# Call the inherited define_node for handling integers

backends/arm/operators/op_add.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def define_node(
4545
import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
4646

4747
validate_num_inputs(self.target, inputs, 2)
48-
validate_same_dtype(self.target, [*inputs, output])
48+
validate_same_dtype(self.target, [*inputs, output], ts)
4949

5050
# Handle int8 (quantized) and int32
5151
supported_dtypes = [ts.DType.INT8, ts.DType.INT32]
@@ -118,7 +118,7 @@ def define_node(
118118
import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore
119119

120120
validate_num_inputs(self.target, inputs, 2)
121-
validate_same_dtype(self.target, [*inputs, output])
121+
validate_same_dtype(self.target, [*inputs, output], ts)
122122

123123
if inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]:
124124
# Call the inherited define_node for handling integers
@@ -163,7 +163,7 @@ def define_node(
163163
import serializer.tosa_serializer as ts # type: ignore
164164

165165
validate_num_inputs(self.target, inputs, 2)
166-
validate_same_dtype(self.target, [*inputs, output])
166+
validate_same_dtype(self.target, [*inputs, output], ts)
167167

168168
# Handle int8 (quantized) and int32
169169
supported_dtypes = [ts.DType.INT8, ts.DType.INT32]
@@ -226,7 +226,7 @@ def define_node(
226226
import serializer.tosa_serializer as ts # type: ignore
227227

228228
validate_num_inputs(self.target, inputs, 2)
229-
validate_same_dtype(self.target, [*inputs, output])
229+
validate_same_dtype(self.target, [*inputs, output], ts)
230230

231231
if inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]:
232232
# Call the inherited define_node for handling integers

0 commit comments

Comments
 (0)