Skip to content

Commit 3c9dcc8

Browse files
committed
Update base for Update on "[Executorch][llm] Enable local global attention in export_llama script"
Added a new option of --local_global_attention that takes in pattern of sizes to determine which layers are using local sliding window attention. For example, [0, 256, 256, 0, 256, 256] can be used for 6 layers transformer. Or you can also use [0, 256, 256] as pattern you want to repeat. Differential Revision: [D73891423](https://our.internmc.facebook.com/intern/diff/D73891423/) [ghstack-poisoned]
2 parents 12fae71 + cd3b53d commit 3c9dcc8

File tree

187 files changed

+6632
-1319
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

187 files changed

+6632
-1319
lines changed

.ci/scripts/build-qnn-sdk.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ set_up_aot() {
3333
cmake .. \
3434
-DCMAKE_INSTALL_PREFIX=$PWD \
3535
-DEXECUTORCH_BUILD_QNN=ON \
36+
-DANDROID_NATIVE_API_LEVEL=30 \
3637
-DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
3738
-DEXECUTORCH_BUILD_DEVTOOLS=ON \
3839
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \

.ci/scripts/test_model.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ test_model_with_coreml() {
222222

223223
DTYPE=float16
224224

225-
"${PYTHON_EXECUTABLE}" -m examples.apple.coreml.scripts.export --model_name="${MODEL_NAME}" --compute_precision "${DTYPE}"
225+
"${PYTHON_EXECUTABLE}" -m examples.apple.coreml.scripts.export --model_name="${MODEL_NAME}" --compute_precision "${DTYPE}" --use_partitioner
226226
EXPORTED_MODEL=$(find "." -type f -name "${MODEL_NAME}*.pte" -print -quit)
227227

228228
if [ -n "$EXPORTED_MODEL" ]; then

.github/workflows/_link_check.yml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
on:
2+
workflow_call:
3+
inputs:
4+
ref:
5+
type: string
6+
required: true
7+
8+
jobs:
9+
lint-urls:
10+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
11+
with:
12+
runner: linux.2xlarge
13+
docker-image: executorch-ubuntu-22.04-linter
14+
submodules: 'none'
15+
fetch-depth: 0
16+
ref: ${{ inputs.ref }}
17+
timeout: 90
18+
script: |
19+
./scripts/lint_urls.sh $(
20+
[ "${{ github.event_name }}" = "pull_request" ] \
21+
&& git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
22+
|| [ "${{ github.event_name }}" = "push" ] \
23+
&& git diff --name-only ${{ github.event.before }} ${{ github.sha }}
24+
)
25+
26+
lint-xrefs:
27+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
28+
with:
29+
runner: linux.2xlarge
30+
docker-image: executorch-ubuntu-22.04-linter
31+
submodules: 'none'
32+
fetch-depth: 0
33+
ref: ${{ inputs.ref }}
34+
timeout: 90
35+
script: |
36+
./scripts/lint_xrefs.sh $(
37+
[ "${{ github.event_name }}" = "pull_request" ] \
38+
&& git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
39+
|| [ "${{ github.event_name }}" = "push" ] \
40+
&& git diff --name-only ${{ github.event.before }} ${{ github.sha }}
41+
)

.github/workflows/lint.yml

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -64,29 +64,10 @@ jobs:
6464
6565
exit $RC
6666
67-
lint-urls:
68-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
67+
link-check:
68+
uses: ./.github/workflows/_link_check.yml
6969
with:
70-
runner: linux.2xlarge
71-
docker-image: executorch-ubuntu-22.04-linter
72-
submodules: 'none'
73-
fetch-depth: 0
7470
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
75-
timeout: 90
76-
script: |
77-
./scripts/lint_urls.sh
78-
79-
lint-xrefs:
80-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
81-
with:
82-
runner: linux.2xlarge
83-
docker-image: executorch-ubuntu-22.04-linter
84-
submodules: 'none'
85-
fetch-depth: 0
86-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
87-
timeout: 90
88-
script: |
89-
./scripts/lint_xrefs.sh
9071

9172
android-java-format:
9273
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main

.github/workflows/nightly.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,9 @@ jobs:
3030
test-infra-ref: main
3131
updatebot-token: ${{ secrets.UPDATEBOT_TOKEN }}
3232
pytorchbot-token: ${{ secrets.GH_PYTORCHBOT_TOKEN }}
33+
34+
link-check:
35+
needs: update-pytorch-commit-hash
36+
uses: ./.github/workflows/_link_check.yml
37+
with:
38+
ref: ${{ github.sha }}

Package.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,9 @@ let package = Package(
7777
name: "\(key)_dependencies",
7878
dependencies: [.target(name: key)],
7979
path: ".Package.swift/\(key)",
80-
linkerSettings:
80+
linkerSettings: [
81+
.linkedLibrary("c++")
82+
] +
8183
(value["frameworks"] as? [String] ?? []).map { .linkedFramework($0) } +
8284
(value["libraries"] as? [String] ?? []).map { .linkedLibrary($0) }
8385
),

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ To get started you can:
5151

5252
- Visit the [Step by Step Tutorial](https://pytorch.org/executorch/stable/getting-started.html) to get things running locally and deploy a model to a device
5353
- Use this [Colab Notebook](https://colab.research.google.com/drive/1qpxrXC3YdJQzly3mRg-4ayYiOjC6rue3?usp=sharing) to start playing around right away
54-
- Jump straight into LLM use cases by following specific instructions for [Llama](examples/models/llama/README.md) and [Llava](examples/models/llava/README.md)
54+
- Jump straight into LLM use cases by following specific instructions for popular open-source models such as [Llama](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), and [Llava](examples/models/llava/README.md)
5555

5656
## Feedback and Engagement
5757

backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,17 @@
8888
ET_LOG(Error, "%s: DataType=%d is not supported", ETCoreMLStrings.delegateIdentifier.UTF8String, (int)tensor.scalar_type());
8989
return std::nullopt;
9090
}
91-
91+
9292
std::vector<ssize_t> strides(tensor.strides().begin(), tensor.strides().end());
9393
std::vector<size_t> shape(tensor.sizes().begin(), tensor.sizes().end());
94+
95+
// If tensor is rank 0, wrap in rank 1
96+
// See https://github.com/apple/coremltools/blob/8.2/coremltools/converters/mil/frontend/torch/exir_utils.py#L73
97+
if (shape.size() == 0) {
98+
shape.push_back(1);
99+
strides.push_back(1);
100+
}
101+
94102
MultiArray::MemoryLayout layout(dataType.value(), std::move(shape), std::move(strides));
95103
switch (argType) {
96104
case ArgType::Input: {
@@ -233,6 +241,12 @@ ModelLoggingOptions get_logging_options(BackendExecutionContext& context) {
233241
std::array<SizesType, kTensorDimensionLimit> new_shape;
234242
for (size_t i = nInputs; i < nInputs + nOutputs; i++) {
235243
Tensor& t = args[i]->toTensor();
244+
// If t has rank 0, do not resize. delegate_args[i] will have rank 1
245+
// because we resized it in get_multi_array
246+
if (t.dim() == 0) {
247+
continue;
248+
}
249+
236250
int rank = delegate_args[i].layout().rank();
237251
assert (rank <= new_shape.size());
238252
for (int d = 0; d < rank; d++) {

backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
#import <XCTest/XCTest.h>
1616
#import <executorch/runtime/platform/runtime.h>
1717
#import <model_logging_options.h>
18+
#import <multiarray.h>
19+
20+
using namespace executorchcoreml;
1821

1922
@interface ETCoreMLModelManagerTests : XCTestCase
2023

@@ -110,7 +113,7 @@ - (void)testAddModelExecution {
110113
XCTAssertNotNil(inputs);
111114
MLMultiArray *output = [ETCoreMLTestUtils filledMultiArrayWithShape:inputs[0].shape dataType:inputs[0].dataType repeatedValue:@(0) error:&localError];
112115
NSArray<MLMultiArray *> *args = [inputs arrayByAddingObject:output];
113-
XCTAssertTrue([self.modelManager executeModelWithHandle:handle
116+
XCTAssertTrue([self.modelManager executeModelWithHandle:handle
114117
args:args
115118
loggingOptions:executorchcoreml::ModelLoggingOptions()
116119
eventLogger:nullptr
@@ -148,4 +151,77 @@ - (void)testMulModelExecution {
148151
}
149152
}
150153

154+
// See https://github.com/pytorch/executorch/pull/10465
155+
- (void)testAutoreleasepoolError {
156+
NSURL *modelURL = [self.class bundledResourceWithName:@"add_coreml_all" extension:@"bin"];
157+
NSError *localError = nil;
158+
XCTAssertNotNil(modelURL);
159+
160+
NSData *modelData = [NSData dataWithContentsOfURL:modelURL];
161+
MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init];
162+
configuration.computeUnits = MLComputeUnitsAll;
163+
ModelHandle *modelHandle = [self.modelManager loadModelFromAOTData:modelData
164+
configuration:configuration
165+
error:&localError];
166+
XCTAssert(modelHandle);
167+
168+
ETCoreMLModel *model = [self.modelManager modelWithHandle:modelHandle];
169+
XCTAssert(model);
170+
171+
NSArray<MLMultiArray *> *inputArrays =
172+
[ETCoreMLTestUtils inputsForModel:model repeatedValues:@[@(2), @(3)] error:&localError];
173+
XCTAssert(inputArrays);
174+
175+
std::vector<MultiArray> multiArrays;
176+
multiArrays.reserve(inputArrays.count + model.orderedOutputNames.count);
177+
for (MLMultiArray *array in inputArrays) {
178+
auto dataTypeOpt = to_multiarray_data_type(array.dataType);
179+
XCTAssert(dataTypeOpt.has_value());
180+
auto dataType = dataTypeOpt.value();
181+
182+
std::vector<size_t> dims;
183+
for (NSNumber *n in array.shape) {
184+
dims.push_back(n.unsignedLongValue);
185+
}
186+
187+
std::vector<ssize_t> strides(dims.size());
188+
ssize_t currentStride = 1;
189+
for (NSInteger i = dims.size() - 1; i >= 0; --i) {
190+
strides[i] = currentStride;
191+
currentStride *= dims[i];
192+
}
193+
194+
multiArrays.emplace_back(array.dataPointer,
195+
MultiArray::MemoryLayout(dataType, dims, strides));
196+
}
197+
198+
auto inputLayout = multiArrays[0].layout();
199+
size_t bufferSize = inputLayout.num_bytes();
200+
for (NSUInteger i = 0; i < model.orderedOutputNames.count; ++i) {
201+
multiArrays.emplace_back(calloc(1, bufferSize), inputLayout);
202+
}
203+
// corrupt first input shape to force error
204+
{
205+
auto originalLayout = multiArrays[0].layout();
206+
auto corruptedDims = originalLayout.shape();
207+
corruptedDims[0] += 1;
208+
multiArrays[0] = MultiArray(multiArrays[0].data(),
209+
MultiArray::MemoryLayout(originalLayout.dataType(),
210+
corruptedDims,
211+
originalLayout.strides()));
212+
}
213+
214+
BOOL success = [self.modelManager executeModelWithHandle:modelHandle
215+
argsVec:multiArrays
216+
loggingOptions:ModelLoggingOptions()
217+
eventLogger:nullptr
218+
error:&localError];
219+
XCTAssertFalse(success);
220+
XCTAssertNotNil(localError);
221+
222+
for (size_t i = inputArrays.count; i < multiArrays.size(); ++i) {
223+
free(multiArrays[i].data());
224+
}
225+
}
226+
151227
@end

backends/apple/coreml/scripts/install_requirements.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ SCRIPT_DIR_PATH="$(
1212

1313
# TODO(jathu): remove the need to fetch coremltools to build deps for coreml_executor_runner.
1414
# Keep this version in sync with: pyproject.toml
15-
COREMLTOOLS_VERSION="8.2"
15+
COREMLTOOLS_VERSION="8.3"
1616

1717
red=`tput setaf 1`
1818
green=`tput setaf 2`

backends/apple/mps/setup.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,12 @@ cd executorch
7676
## Run the mv3 generated model using the mps_executor_runner
7777

7878
```bash
79-
./cmake-out/examples/apple/mps/mps_executor_runner --model_path mv3_mps_bundled_fp16.pte --bundled_program
79+
./cmake-out/examples/apple/mps/mps_executor_runner --model_path mv3_mps_float16_bundled.pte --bundled_program
8080
```
8181

8282
- You should see the following results. Note that no output file will be generated in this example:
8383
```
84-
I 00:00:00.003290 executorch:mps_executor_runner.mm:286] Model file mv3_mps_bundled_fp16.pte is loaded.
84+
I 00:00:00.003290 executorch:mps_executor_runner.mm:286] Model file mv3_mps_float16_bundled.pte is loaded.
8585
I 00:00:00.003306 executorch:mps_executor_runner.mm:292] Program methods: 1
8686
I 00:00:00.003308 executorch:mps_executor_runner.mm:294] Running method forward
8787
I 00:00:00.003311 executorch:mps_executor_runner.mm:349] Setting up non-const buffer 1, size 606112.
@@ -118,7 +118,7 @@ python3 -m examples.apple.mps.scripts.mps_example --model_name="mv3" --generate_
118118
```
119119
2. Run your Program on the ExecuTorch runtime and generate an [ETDump](../../../docs/source/etdump.md).
120120
```
121-
./cmake-out/examples/apple/mps/mps_executor_runner --model_path mv3_mps_bundled_fp16.pte --bundled_program --dump-outputs
121+
./cmake-out/examples/apple/mps/mps_executor_runner --model_path mv3_mps_float16_bundled.pte --bundled_program --dump-outputs
122122
```
123123
3. Create an instance of the Inspector API by passing in the ETDump you have sourced from the runtime along with the optionally generated ETRecord from step 1.
124124
```bash

backends/arm/_passes/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
)
4040
from .fuse_batchnorm2d_pass import FuseBatchnorm2DPass # noqa
4141
from .fuse_constant_ops_pass import ComputeConstantOpsAOT, FuseConstantArgsPass # noqa
42+
from .fuse_equal_placeholders_pass import FuseEqualPlaceholdersPass # noqa
4243
from .fuse_quantized_activation_pass import FuseQuantizedActivationPass # noqa
4344
from .insert_rescales_pass import InsertRescalePass # noqa
4445
from .insert_table_ops import InsertTableOpsPass # noqa

backends/arm/_passes/arm_pass_manager.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
FoldAndAnnotateQParamsPass,
4141
FuseBatchnorm2DPass,
4242
FuseConstantArgsPass,
43+
FuseEqualPlaceholdersPass,
4344
FuseQuantizedActivationPass,
4445
InsertRescalePass,
4546
InsertTableOpsPass,
@@ -58,6 +59,9 @@
5859
)
5960

6061
from executorch.backends.arm.tosa_specification import Tosa_0_80, TosaSpecification
62+
from executorch.backends.transforms.decompose_sdpa import (
63+
DecomposeScaledDotProductAttention,
64+
)
6165
from executorch.backends.transforms.fuse_view_copy import FuseViewCopyTransform
6266
from executorch.backends.xnnpack._passes.remove_getitem_op import RemoveGetItemPass
6367
from executorch.exir import ExportedProgram
@@ -113,6 +117,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
113117
self.add_pass(FuseConstantArgsPass(exported_program))
114118

115119
self.add_pass(InsertTableOpsPass(exported_program))
120+
self.add_pass(FuseEqualPlaceholdersPass(exported_program))
116121
self.add_pass(AnnotateChannelsLastDimOrder())
117122
self.add_pass(InsertRescalePass())
118123

@@ -164,6 +169,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
164169
self.add_pass(FuseViewCopyTransform())
165170
self.add_pass(FuseConstantArgsPass(exported_program))
166171
self.add_pass(InsertTableOpsPass(exported_program))
172+
self.add_pass(FuseEqualPlaceholdersPass(exported_program))
167173
self.add_pass(AnnotateChannelsLastDimOrder())
168174
self.add_pass(InsertRescalePass())
169175

@@ -191,6 +197,7 @@ def transform_to_backend_pipeline(self, exported_program: ExportedProgram):
191197
)
192198

193199
def transform_for_annotation_pipeline(self, graph_module: GraphModule):
200+
self.add_pass(DecomposeScaledDotProductAttention())
194201
self.add_pass(ReplaceScalarWithTensorArgPassTOSABI())
195202
self.add_pass(ScalarsToAttributePass())
196203
self.add_pass(DecomposeLayerNormPass())

backends/arm/_passes/decompose_softmax_pass.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@
88
from executorch.exir.pass_base import ExportPass
99

1010
# For BI case
11-
torch_softmax = (torch.ops.aten.softmax.int, torch.ops.aten.log_softmax.int)
11+
torch_softmax = (
12+
torch.ops.aten.softmax.int,
13+
torch.ops.aten._safe_softmax.default,
14+
torch.ops.aten.log_softmax.int,
15+
)
1216
# For MI case
1317
edge_softmax = (
1418
exir_ops.edge.aten._softmax.default,

0 commit comments

Comments
 (0)