Skip to content

Commit 2f48d5a

Browse files
authored
Merge branch 'main' into android-thinking
2 parents 40f37dc + 3997ae9 commit 2f48d5a

File tree

9 files changed

+80
-14
lines changed

9 files changed

+80
-14
lines changed

backends/arm/_passes/arm_pass_manager.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@
5959
)
6060

6161
from executorch.backends.arm.tosa_specification import Tosa_0_80, TosaSpecification
62+
from executorch.backends.transforms.decompose_sdpa import (
63+
DecomposeScaledDotProductAttention,
64+
)
6265
from executorch.backends.transforms.fuse_view_copy import FuseViewCopyTransform
6366
from executorch.backends.xnnpack._passes.remove_getitem_op import RemoveGetItemPass
6467
from executorch.exir import ExportedProgram
@@ -194,6 +197,7 @@ def transform_to_backend_pipeline(self, exported_program: ExportedProgram):
194197
)
195198

196199
def transform_for_annotation_pipeline(self, graph_module: GraphModule):
200+
self.add_pass(DecomposeScaledDotProductAttention())
197201
self.add_pass(ReplaceScalarWithTensorArgPassTOSABI())
198202
self.add_pass(ScalarsToAttributePass())
199203
self.add_pass(DecomposeLayerNormPass())

backends/arm/_passes/decompose_softmax_pass.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@
88
from executorch.exir.pass_base import ExportPass
99

1010
# For BI case
11-
torch_softmax = (torch.ops.aten.softmax.int, torch.ops.aten.log_softmax.int)
11+
torch_softmax = (
12+
torch.ops.aten.softmax.int,
13+
torch.ops.aten._safe_softmax.default,
14+
torch.ops.aten.log_softmax.int,
15+
)
1216
# For MI case
1317
edge_softmax = (
1418
exir_ops.edge.aten._softmax.default,

backends/arm/test/models/test_conformer.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ def test_conformer_tosa_BI(self):
8383
)
8484
)
8585

86-
@unittest.expectedFailure # TODO(MLETORCH-635)
8786
def test_conformer_u55_BI(self):
8887
tester = (
8988
ArmTester(
@@ -97,13 +96,20 @@ def test_conformer_u55_BI(self):
9796
.to_executorch()
9897
.serialize()
9998
)
99+
100100
if conftest.is_option_enabled("corstone_fvp"):
101-
tester.run_method_and_compare_outputs(
102-
qtol=1.0,
103-
rtol=1.0,
104-
atol=5.0,
105-
inputs=get_test_inputs(self.dim, self.lengths, self.num_examples),
106-
)
101+
try:
102+
tester.run_method_and_compare_outputs(
103+
qtol=1.0,
104+
rtol=1.0,
105+
atol=5.0,
106+
inputs=get_test_inputs(self.dim, self.lengths, self.num_examples),
107+
)
108+
self.fail(
109+
"TODO(MLETORCH-635): Expected failure under FVP option, but test passed."
110+
)
111+
except Exception:
112+
pass
107113

108114
@unittest.expectedFailure # TODO(MLETORCH-635)
109115
def test_conformer_u85_BI(self):

backends/arm/test/ops/test_sdpa.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
7+
from typing import Tuple
8+
9+
import torch
10+
11+
from executorch.backends.arm.test.tester.test_pipeline import (
12+
TosaPipelineBI,
13+
TosaPipelineMI,
14+
)
15+
16+
17+
class SDPA(torch.nn.Module):
18+
def __init__(self):
19+
super().__init__()
20+
21+
def forward(self, query, key, value):
22+
return torch.nn.functional.scaled_dot_product_attention(
23+
query, key, value, attn_mask=None, dropout_p=0.0, is_causal=False
24+
)
25+
26+
27+
input_t = Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
28+
29+
30+
def test_sdpa_MI():
31+
test_input = tuple(torch.randn(1, 3, 197, 64) for x in range(3))
32+
pipeline = TosaPipelineMI[input_t](SDPA(), test_input, [], [])
33+
pipeline.pop_stage("check_count.exir")
34+
pipeline.run()
35+
36+
37+
def test_sdpa_BI():
38+
test_input = tuple(torch.randn(1, 3, 197, 64) for x in range(3))
39+
pipeline = TosaPipelineBI[input_t](SDPA(), test_input, [], [])
40+
pipeline.pop_stage("check.quant_nodes")
41+
pipeline.pop_stage("check_count.exir")
42+
pipeline.pop_stage(
43+
"run_method_and_compare_outputs"
44+
) # TODO: reference is not quantized
45+
pipeline.run()
-5.39 MB
Binary file not shown.
-245 KB
Loading

examples/demo-apps/apple_ios/LLaMA/LLaMA/Application/Constants.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,6 @@ You are a helpful assistant.
2525
"""
2626

2727
public static let llama3PromptTemplate = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>%@<|eot_id|><|start_header_id|>assistant<|end_header_id|>"
28+
29+
public static let phi4PromptTemplate = "<|user|>%@<|end|><|assistant|>"
2830
}

examples/demo-apps/apple_ios/LLaMA/LLaMA/Application/ContentView.swift

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ struct ContentView: View {
8686
case llama
8787
case llava
8888
case qwen3
89+
case phi4
8990

9091
static func fromPath(_ path: String) -> ModelType {
9192
let filename = (path as NSString).lastPathComponent.lowercased()
@@ -95,8 +96,10 @@ struct ContentView: View {
9596
return .llava
9697
} else if filename.hasPrefix("qwen3") {
9798
return .qwen3
99+
} else if filename.hasPrefix("phi4") {
100+
return .phi4
98101
}
99-
print("Unknown model type in path: \(path). Model filename should start with one of: llama, llava, or qwen3")
102+
print("Unknown model type in path: \(path). Model filename should start with one of: llama, llava, qwen3, or phi4")
100103
exit(1)
101104
}
102105
}
@@ -343,15 +346,15 @@ struct ContentView: View {
343346
}
344347

345348
switch modelType {
346-
case .llama, .qwen3:
349+
case .llama, .qwen3, .phi4:
347350
runnerHolder.runner = runnerHolder.runner ?? Runner(modelPath: modelPath, tokenizerPath: tokenizerPath)
348351
case .llava:
349352
runnerHolder.llavaRunner = runnerHolder.llavaRunner ?? LLaVARunner(modelPath: modelPath, tokenizerPath: tokenizerPath)
350353
}
351354

352355
guard !shouldStopGenerating else { return }
353356
switch modelType {
354-
case .llama, .qwen3:
357+
case .llama, .qwen3, .phi4:
355358
if let runner = runnerHolder.runner, !runner.isLoaded() {
356359
var error: Error?
357360
let startLoadTime = Date()
@@ -474,12 +477,14 @@ struct ContentView: View {
474477
prompt = String(format: Constants.llama3PromptTemplate, text)
475478
case .llava:
476479
prompt = String(format: Constants.llama3PromptTemplate, text)
480+
case .phi4:
481+
prompt = String(format: Constants.phi4PromptTemplate, text)
477482
}
478483

479484
try runnerHolder.runner?.generate(prompt, sequenceLength: seq_len) { token in
480485

481486
if token != prompt {
482-
if token == "<|eot_id|>" {
487+
if token == "<|eot_id|>" {
483488
// hack to fix the issue that extension/llm/runner/text_token_generator.h
484489
// keeps generating after <|eot_id|>
485490
shouldStopShowingToken = true

examples/demo-apps/apple_ios/LLaMA/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,12 @@ Download already exported LLaMA/LLaVA models along with tokenizers from [Hugging
3232
```bash
3333
open examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj
3434
```
35-
35+
3636
3. Click the Play button to launch the app in the Simulator.
3737

3838
4. To run on a device, ensure you have it set up for development and a provisioning profile with the `increased-memory-limit` entitlement. Update the app's bundle identifier to match your provisioning profile with the required capability.
3939
40-
5. After successfully launching the app, copy the exported ExecuTorch model (`.pte`) and tokenizer (`.model`) files to the iLLaMA folder. Three models are currently supported at the moment - Llama, Qwen3, and Llava multimodal. Please ensure that your model `.pte` file starts with `llama`, `qwen3`, or `llava` so that the app selects the correct model type.
40+
5. After successfully launching the app, copy the exported ExecuTorch model (`.pte`) and tokenizer (`.model`) files to the iLLaMA folder. Four models are currently supported at the moment - Llama, Qwen3, Phi4-mini, and Llava multimodal. Please ensure that your model `.pte` file starts with `llama`, `qwen3`, `phi4` or `llava` so that the app selects the correct model type.
4141
4242
- **For the Simulator:** Drag and drop both files onto the Simulator window and save them in the `On My iPhone > iLLaMA` folder.
4343
- **For a Device:** Open a separate Finder window, navigate to the Files tab, drag and drop both files into the iLLaMA folder, and wait for the copying to finish.

0 commit comments

Comments
 (0)