Skip to content

[llava][18/N] Move token generation loop to a class #4652

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1,328 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
1328 commits
Select commit Hold shift + click to select a range
ef640bf
Provide kernels with true reference implementations for quantized ops…
mcremon-meta Jul 16, 2024
242f2c0
Delete deprecated non_const_buffer methods from Program (#4272)
dbort Jul 17, 2024
740a0a5
Update serializer to sync with export serializer (#4264)
tarun292 Jul 17, 2024
0cde6b8
Move tokenizer into extension/llm/tokenizer (#4278)
helunwencser Jul 17, 2024
2b54194
Add homebrew package for libopqs (#142)
bigfootjon Jul 17, 2024
037cfcf
Disable uploading to S3 and test on-device (#4287)
guangy10 Jul 17, 2024
8950d90
Use external_deps for sentencepiece (#4269)
Jul 17, 2024
b448254
Update Xcode project paths after tokenizer source code move. (#4290)
shoumikhin Jul 17, 2024
c3357e1
use _preserve_ops for to_edge_transform_and_lower (#4273)
mcr229 Jul 17, 2024
1b0bf1c
Qualcomm AI Engine Direct - enable loading context binary directly (#…
haowhsu-quic Jul 17, 2024
4a88318
Intermediate output logging enablement - Inspector logging (#4293)
Olivia-liu Jul 18, 2024
544462d
Add HF RoPE into llama_transformer (#4256)
larryliu0820 Jul 18, 2024
c72190a
Add Buffer Memory Bandwidth profiler (#4262)
Jul 18, 2024
a4decca
Add UBO Read Bandwidth profiler (#4270)
Jul 18, 2024
e5687a4
Add Shared Memory Bandwidth Profiler (#4277)
Jul 18, 2024
1d7d71d
Allow expression of scalar tensor buffers, non string values in varia…
SS-JIA Jul 18, 2024
92b87e4
Fixed the command (#4296)
tusharxoxoxo Jul 18, 2024
282e7fe
Refactor android demo job (#4288)
guangy10 Jul 18, 2024
8e0f856
Remove Buck2 from examples/sdk/README.md (#4299)
kirklandsign Jul 18, 2024
71bb565
Apply late review feedback for D58874164 (enable parallel prefill) (#…
swolchok Jul 18, 2024
c757499
Upload android artifacts to s3 (#4300)
Jul 18, 2024
1933dae
Add Llava model definition (#4259)
larryliu0820 Jul 18, 2024
0333390
Add export_llava.py (#4295)
larryliu0820 Jul 18, 2024
9a4823b
Migrate all DataLoader::Load callsites (#4291)
jackzhxng Jul 18, 2024
8b43bf5
Fix crash when tokenizer files are not found (#4266)
kirklandsign Jul 19, 2024
ba052a4
Add a step to build Java extension code in LLAMA docs (#4265)
kirklandsign Jul 19, 2024
7e417f4
Add Warp Size metric (#4298)
Jul 19, 2024
5865a57
Update Hugging Face packages to latest versions (#4306)
Jul 19, 2024
9d85965
update the pinned pytorch hash (#4313)
pytorchupdatebot Jul 19, 2024
6dbb4dc
Fix sdpa flash attention op for et llama deployment (#4322)
kimishpatel Jul 22, 2024
844a69f
Simplify tokenizer header (#4283)
helunwencser Jul 22, 2024
f0364e8
Fix quantized_matmul with 4D inputs (#4335)
mcremon-meta Jul 22, 2024
0e2b205
Qualcomm AI Engine Direct - add program validation (#4297)
haowhsu-quic Jul 22, 2024
0e032c5
Add support for unbacked symints (#4326)
lucylq Jul 23, 2024
8bdafb0
Add missing quantized_matmul meta kernel (#4343)
mcremon-meta Jul 23, 2024
b7fb9bd
Directly load program in sdk_example_runner (#4352)
jackzhxng Jul 23, 2024
908b5a5
Update xnnpack (#4340)
digantdesai Jul 23, 2024
6556991
Add full op for Arm backend (#4073)
Erik-Lundell Jul 23, 2024
5d58203
Disable sdpa_with_kv_cache for now (#4319)
larryliu0820 Jul 23, 2024
d288076
add dim order into executorch concept documentation (#4349)
Gasoonjia Jul 23, 2024
74b0e89
Add GQA test for sdpa_with_kv_cache large seq length (#4324)
kimishpatel Jul 23, 2024
3269e61
Add more tests to sdpa_with_kv_cache for speculative decode (#4325)
kimishpatel Jul 23, 2024
f0ebfa2
Add Warp Size metric for alternative SM workload distribution (#4305)
Jul 23, 2024
09cfc92
Add sigmoid operator to Arm backend (#4114)
Erik-Lundell Jul 23, 2024
6153b1b
Update ExportedProgram ctor callsites. (#4347)
zhxchen17 Jul 23, 2024
dbc73a6
update qnn doc (#4356)
cccclai Jul 23, 2024
78df332
Android update prebuilt library link (#4362)
kirklandsign Jul 23, 2024
ae1f098
Re-enable CI with smaller model for validation (#4304)
kirklandsign Jul 23, 2024
93c56cb
Prepare for merging _export/exported_program.py and export/exported_p…
yushangdi Jul 23, 2024
628b280
update phi-3-mini readme doc (#4377)
helunwencser Jul 23, 2024
3154afc
Update llama docs on main (#4361)
lucylq Jul 23, 2024
48da61a
Enable aten.relu_.default in the CadenceQuantizer (#4344)
mcremon-meta Jul 23, 2024
fd2dccf
Fix log.cpp when compiled with `executorch.enable_et_log=false` (#4358)
tarun292 Jul 23, 2024
47d309a
Introduce periodic.yml (#4348)
kirklandsign Jul 24, 2024
11b2fcb
Add sub operator for Arm backend (#4074)
Erik-Lundell Jul 24, 2024
6c69ebd
Support llama3.1 (#4376)
larryliu0820 Jul 24, 2024
56120f9
Qualcomm AI Engine Direct - Refactor & centralize common keywords (#4…
chuntl Jul 24, 2024
69dcfe0
Set FLATBUFFERS_MAX_ALIGNMENT=1024 (#4215)
dbort Jul 24, 2024
f6bad56
Update Apple runtime doc with a new version. (#4398)
shoumikhin Jul 24, 2024
944e1e8
Add "schedule" option in gather_test_models.py (#4397)
kirklandsign Jul 24, 2024
99623be
Sort .gitmodules (#4389)
shoumikhin Jul 24, 2024
d8866e3
Move sentencepiece to extension/llm/third-party (#4388)
shoumikhin Jul 24, 2024
5e8cce1
remove target_sdk_version attributes (#4394)
rmaz Jul 24, 2024
d6d691e
Add llama3.1 to readme (#4378)
lucylq Jul 24, 2024
85d4d12
Prepare the script to run tests on Android emulator (#4387)
huydhn Jul 25, 2024
5ad00e7
Update readme to use the latest release. (#4406)
shoumikhin Jul 25, 2024
dbf7d6e
Build and link against sentencepiece 3rd party lib. (#4410)
shoumikhin Jul 25, 2024
5b0700b
some bug fixes (#4409)
mcr229 Jul 25, 2024
dbf87b0
add phi-3-mini eager mode example (#4315)
helunwencser Jul 25, 2024
77c905d
Define custom op for grid points generator of single level feature ma…
Jul 25, 2024
889e5cb
Enable SPIR-V compiler optimization (#4402)
SS-JIA Jul 25, 2024
faeeca8
remove unused tensors from VK model's graph (#4427)
copyrightly Jul 26, 2024
11407f0
immutable accessors in graph signature (#4428)
avikchaudhuri Jul 26, 2024
5d3ec13
Revert D60253955: immutable accessors in graph signature
atalman Jul 26, 2024
9129892
immutable accessors in graph signature (#4433)
avikchaudhuri Jul 26, 2024
5a20a49
Fix numpy and pandas versions. (#4430)
shoumikhin Jul 26, 2024
1e4603d
FileDataLoader fails to read the file when size > INT32_MAX (#4435)
cymbalrush Jul 29, 2024
dd88708
Hoist numel out of loop condtion in op_embedding (#4146)
swolchok Jul 29, 2024
e087ac8
Qualcomm AI Engine Direct - Fix UT example script hang when exception…
winskuo-quic Jul 29, 2024
f695f8e
Support qmatmul with different dims tensors (#4438)
mcremon-meta Jul 29, 2024
e6684f7
Use linux.24xlarge for llava test (#4446)
huydhn Jul 29, 2024
711ecec
fix zero arg export in training_ir and constant tensor handling (#4382)
ydwu4 Jul 30, 2024
7f6a341
Remove redundant generate_*_compile_spec funcs (#3869)
robell Jul 30, 2024
da24d18
Add slice op to Arm backend (#4072)
Erik-Lundell Jul 30, 2024
318a178
Delete hooks.h (#4448)
dbort Jul 30, 2024
db1c4d8
Add an option to turn on/off sdpa_with_kv_cache (#4444)
larryliu0820 Jul 30, 2024
1e14333
Add exportable baby llama example (#4345)
mcremon-meta Jul 30, 2024
3d5a149
Add FVP tests for linear op (#4393)
Erik-Lundell Jul 30, 2024
c659b9c
Add flakyness mark to conv BI test (#4390)
Erik-Lundell Jul 30, 2024
38724d0
Add test debug features (#4144)
Erik-Lundell Jul 30, 2024
3c25aec
Add docstrings to all unittest.TestCase:s (#4391)
Erik-Lundell Jul 30, 2024
28cfabb
Fix use_sdpa_with_kv_cache option (#4456)
larryliu0820 Jul 30, 2024
b7c8378
nop validation during build (#4449)
blackm00n Jul 30, 2024
da7ca6f
Fix build error (#4464)
SS-JIA Jul 30, 2024
ea0c017
Add 3D Texture Bandwidth metric (#4336)
Jul 30, 2024
298b625
Add config file support for constants and test control (#4337)
Jul 30, 2024
5867129
Add metric for 3D texture max concurrent cache read (#4421)
Jul 30, 2024
e03181d
Refactor and class split (#4432)
Jul 30, 2024
1727aa1
fix eval llama (#4469)
cccclai Jul 30, 2024
1ec3444
Migrate sampler to extension/llm (#4460)
larryliu0820 Jul 30, 2024
69f3f1c
Fix prewarming (#4454)
cymbalrush Jul 30, 2024
9aeceee
Implement grid_priors op (#4440)
Jul 30, 2024
a567abf
Porting over ET MultiModal Demo App (#4455)
cmodi-meta Jul 30, 2024
2f8ecf3
Program.fbs change to support serialized mutable state (#4216)
JacobSzwejbka Jul 30, 2024
2852bda
move delegate debug tools to sdk (#4459)
cccclai Jul 30, 2024
227b49d
Fix unsupported linker flag on Mac (#4473)
Jul 31, 2024
6cd7f38
Move llm custom ops to extension (#4467)
lucylq Jul 31, 2024
dcdd254
Non-GenAI models coverage XNNPACK (#4474)
guangy10 Jul 31, 2024
febd9c1
Change deprecated impl_abstract to register_fake (#4392)
Erik-Lundell Jul 31, 2024
6bfefa8
Use Core ML Quantizer in Llama Export (#4458)
Jul 31, 2024
5890a9c
Allow us to move the proto files outside of fbcode (#4417)
Jul 31, 2024
f9d2de1
Create a buck genrule for schema_generated.h
Jul 31, 2024
f611219
Add workflow for on-demand benchmarking (#4441)
guangy10 Jul 31, 2024
1114539
Change warning to a different log level (#4482)
cccclai Aug 1, 2024
5b37524
Add customized static cache implementation (#4490)
helunwencser Aug 1, 2024
a65700c
add a wrapper for running phi-3-mini with kv cache (#4491)
helunwencser Aug 1, 2024
a743a3b
export phi-3-mini-wrapper (#4478)
helunwencser Aug 1, 2024
ad371a4
Move calculations away from GPU in Bandwidth profilers (#4445)
Aug 1, 2024
1882837
Fixed typo in default devices (#4483)
guangy10 Aug 1, 2024
0bbcabe
Qualcomm AI Engine Direct - Add index and index_put op (#4481)
shewu-quic Aug 1, 2024
34c3c3d
make rust-project deps oss compatible (#4506)
Lev1ty Aug 1, 2024
6fce77f
example of updating dim order for specific part of graph (#4404)
Gasoonjia Aug 1, 2024
c1d53ba
Fix CI OOM issue (#4507)
guangy10 Aug 1, 2024
d207eb0
move get tokenizer to export_llama_lib (#4451)
cccclai Aug 1, 2024
64b7733
update sampler reference (#4508)
lucylq Aug 1, 2024
4ba11e3
Register grid_priors nn.Module test (#4489)
Aug 1, 2024
301a017
Add weights to model outputs (#4302)
JacobSzwejbka Aug 1, 2024
aa56e8c
Split quantize_pt2 to allow calling the same APIs in testing and regu…
mcremon-meta Aug 1, 2024
c329d6a
BC Deprecate XN00 Support (#4450)
mcr229 Aug 1, 2024
d59419c
Rename optimizer buck target to be more specific (#4509)
JacobSzwejbka Aug 1, 2024
1090bcd
Qualcomm AI Engine Direct - Enable HTP emulator test in x86 host (#4503)
chuntl Aug 2, 2024
4483bb6
Add Wav2Vec2 base model (#4513)
mcremon-meta Aug 2, 2024
448c7d3
Support int8 texture tensors without requiring int8 buffers (#4485)
SS-JIA Aug 2, 2024
0caaf3f
Support devices and delegates parameters in android-perf workflow (#4…
guangy10 Aug 2, 2024
1b6d5bb
fix forward _partition_and_lower_one_graph_module
avikchaudhuri Aug 2, 2024
15815dd
Make flatcc cross-compile deterministic (#4312)
haowhsu-quic Aug 2, 2024
cd529cd
Add dim-order op revert pass for delegates (#4470)
digantdesai Aug 2, 2024
1647622
Revert dim_order ops by default (#4518)
digantdesai Aug 2, 2024
ee8359c
Update bilinear test to handle dim_order (#4520)
digantdesai Aug 2, 2024
7a03452
Move runner stats into its own header (#4499)
larryliu0820 Aug 2, 2024
0590ed1
Add test training model (#4511)
JacobSzwejbka Aug 2, 2024
20c86ca
Move test delegate to new AoT flow (#4522)
JacobSzwejbka Aug 2, 2024
738842d
Type Promote Div (#4516)
mcr229 Aug 2, 2024
fbc183f
Dont generate a mutable segment if you ahve no mutable data (#4523)
JacobSzwejbka Aug 2, 2024
76f0b61
Fix Stats in jni_layer_llama.cpp (#4527)
larryliu0820 Aug 2, 2024
7cd96f7
Revert "Make flatcc cross-compile deterministic (#4312)" (#4528)
dbort Aug 2, 2024
9b06921
Make quantized relu more flexible with quant params and use nnlib ker…
mcremon-meta Aug 3, 2024
14c2473
Improve prefill speed (#4531)
helunwencser Aug 5, 2024
864e0b0
Implement runner for phi-3-mini (#4500)
helunwencser Aug 5, 2024
e1cb7bf
configeration based partitioner (#4369)
mcr229 Aug 5, 2024
ded57a7
XNNPartitionerConfig + Config-Based XNNPACKPartitioner (#4370)
mcr229 Aug 5, 2024
4826cb4
Add to_edge_transform_and_lower stage (#4375)
mcr229 Aug 5, 2024
1a68779
refactoring some quant utils (#4374)
mcr229 Aug 5, 2024
ef4f992
Add GEMM Config to partition Linear, QLinear, DQLinear, Addmm (#4373)
mcr229 Aug 5, 2024
8b0e7fb
Add, Relu, Hardtanh configs (#4367)
mcr229 Aug 5, 2024
eb8bee0
Conv2d + BatchNorm (#4372)
mcr229 Aug 5, 2024
5ebd62c
Abs and AvgPool (#4366)
mcr229 Aug 5, 2024
3a62cb2
Cat, Ceil, Clamp (#4368)
mcr229 Aug 5, 2024
4217b9e
Support Conv1d (#4532)
mcr229 Aug 5, 2024
a0d63cc
Div, Mul, Elu Partitioner Configs (#4539)
mcr229 Aug 5, 2024
d88f368
Permute, Softmax, Sigmoid Configs (#4537)
mcr229 Aug 5, 2024
0f5794e
Fix doxygen comment (#4457)
jackzhxng Aug 5, 2024
6aaab87
Suppress -Wglobal-constructors for llama example main (#4547)
swolchok Aug 5, 2024
5636c9c
Fix overload name for prims ops used in `call_map` nodes. (#4465)
hsharma35 Aug 5, 2024
a86204c
Bugfixes to enable call_map. (#4471)
hsharma35 Aug 5, 2024
ec4f576
Max Configs (#4540)
mcr229 Aug 6, 2024
de300e0
Add README in extension/llm (#4544)
iseeyuan Aug 6, 2024
60699fb
Adding mode to enable memory offsets in Executorch trace profiler (#4…
Aug 6, 2024
fadc076
Filter aliasing nodes from memory timeline (#4468)
Aug 6, 2024
f52d8ab
Handle multiple memory IDs using pid (#2974)
Aug 6, 2024
20cb298
Move metadata util to a separate header for reuse (#4550)
larryliu0820 Aug 6, 2024
9d9cda0
More robust DCE pass (#4565)
yushangdi Aug 6, 2024
05a7d52
Back out "BC Deprecate XN00 Support" (#4573)
kwanghoon-meta Aug 7, 2024
bf477e4
Fix wheel build and smoke test (#4429)
kirklandsign Aug 7, 2024
a50d032
update the pinned pytorch hash (#4321)
pytorchupdatebot Aug 7, 2024
34e4870
update readme for phi-3-mini (#4551)
helunwencser Aug 7, 2024
3a5426d
Add module load option in Java/JNI (#4578)
kirklandsign Aug 7, 2024
3816c91
Remove inline Tensor decl from tensor_util.h (#4577)
dbort Aug 7, 2024
ad3c6f4
don't deep copy program (#4575)
mcr229 Aug 7, 2024
76c85e1
llava use to_edge_transform (#4580)
mcr229 Aug 7, 2024
1b484fd
Prep TensorParser for Loading Mutable Segments (#4545)
JacobSzwejbka Aug 7, 2024
92edd04
Revert "update the pinned pytorch hash (#4321)" (#4583)
kirklandsign Aug 7, 2024
530d4a1
Upsample Bilinear 2d
mcr229 Aug 8, 2024
8c813f9
Simplify prompt fields logic and added modelType selector
Riandy Aug 8, 2024
e610947
update encoder latency
cccclai Aug 8, 2024
b4a0b44
Floor, Hardswish, LeakyRelu configs
mcr229 Aug 8, 2024
62f90c6
Mean, Min, Neg Configs
mcr229 Aug 8, 2024
6607c7d
Prelu, Pow, Slice
mcr229 Aug 8, 2024
f863fe4
Sub, Sqrt, Pad
mcr229 Aug 8, 2024
5c27045
Test new partitioner on all models
mcr229 Aug 8, 2024
9a4f32d
Qualcomm AI Engine Direct - Add source transform for kv cache and sdpa
shewu-quic Aug 8, 2024
f19f9d9
Move exir.delegate to PyTorch core to enforce no out-of-tree HOPs
ezyang Aug 8, 2024
91252e7
Qualcomm AI Engine Direct - Add configurable job number for build script
chuntl Aug 8, 2024
2718dd4
cmake flatcc build issue
haowhsu-quic Aug 8, 2024
ff317c0
Expand dataloader interface.
JacobSzwejbka Aug 8, 2024
8ba7d03
Remove unused shim (#4597)
kirklandsign Aug 8, 2024
b671e24
add empty shim/TARGETS (#4600)
kirklandsign Aug 8, 2024
d3367e6
Introduce preprocess custom ops
lucylq Aug 8, 2024
d04c9d6
HiFi4 NNLib added as a third party lib for Cadence DSP backend compil…
cad-audio Aug 8, 2024
2405f74
Remove Proxy from exported programs and modules
yushangdi Aug 8, 2024
192d463
Skip computing capacity for dynamic tensor.
shoumikhin Aug 9, 2024
1b092e9
fix CI errors
eddieluo72 Aug 9, 2024
3b21e79
Buckify open sourced ET-QNN
derekxu Aug 9, 2024
05f13fc
Update on Mac runner build
iseeyuan Aug 9, 2024
5be55e5
[ET-VK][Ez] Improve vec class (#4635)
kirklandsign Aug 9, 2024
4e57f9c
[coreml] Remove references to build_apple_frameworks.sh "--Release" flag
dbort Aug 9, 2024
0b1695f
[ET-VK][Ez] Allow ParamsBindList to append a single Binding Info (#4636)
kirklandsign Aug 9, 2024
e4897dd
[ET-VK] Do not apply zero padding for buffer backed tensors (#4637)
kirklandsign Aug 9, 2024
3f9b39e
Qualcomm AI Engine Direct -- update documents
chiwwang Aug 9, 2024
bbabd28
Implement tile_crop custom op (#4622) (#4622)
manuelcandales Aug 9, 2024
c04bc99
[MPS] Add support for flatbuffer serialization > 4GB
DenisVieriu97 Aug 9, 2024
82608bf
Gh/larryliu0820/46/base (#4643)
larryliu0820 Aug 9, 2024
83a32af
Improve ET llama runner logging and debuggability
derekxu Aug 9, 2024
d3a7c71
[QNN] fix linter (#4645)
kirklandsign Aug 9, 2024
37c4f97
fixed quantized_matmul_out
zonglinpengmeta Aug 9, 2024
5127805
[llava][18/N] Move token generation loop to a class
larryliu0820 Aug 9, 2024
c5a816e
fixed quantized_layer_norm
zonglinpengmeta Aug 9, 2024
79c15ef
Fix llava model definition for export
larryliu0820 Aug 9, 2024
a5c1bb9
Small refactoring of TensorImpl. (#4640)
shoumikhin Aug 9, 2024
bcbdfa8
Add core.pyi to resolve pyre issue when importing from executorch.ext…
LeeOHzzZ Aug 9, 2024
d9cfd6a
add buck targets build coverage to kernels operators and codegen
zonglinpengmeta Aug 9, 2024
f7684ad
Fix bundled program and plan_execute in pybindings
dulinriley Aug 9, 2024
e2ca877
[executorch] Avoid division in Sampler::sample (#4656)
kirklandsign Aug 10, 2024
98b8ae1
Statically Quantize Image Encoder
mcr229 Aug 10, 2024
593da70
Skip checking for dim order and strindes if those are not provided ex…
shoumikhin Aug 10, 2024
a70d070
add fp32 bmm op
mcr229 Aug 10, 2024
7f34796
Qualcomm AI Engine Direct - fix release build issue
haowhsu-quic Aug 10, 2024
c9e7714
improve sampling time
cccclai Aug 10, 2024
ce7f5a0
[llama] Fix text prefiller
larryliu0820 Aug 11, 2024
18b829c
Replace custom op pad with aten op, post-export
lucylq Aug 11, 2024
99e1ae1
Skip storing unnecessary metadata in ManagedTensor.
shoumikhin Aug 11, 2024
fd0ae19
Update base for Update on "[llava][18/N] Move token generation loop t…
larryliu0820 Aug 12, 2024
2709f63
Update on "[llava][18/N] Move token generation loop to a class"
larryliu0820 Aug 12, 2024
e800626
Qualcomm AI Engine Direct - fix conv2d to meet QNN constraint
haowhsu-quic Aug 12, 2024
d53f8fa
Not hardcode llama2 model in perf test
huydhn Aug 12, 2024
9b2bfb6
Update phi3 lora example documentation
jackzhxng Aug 12, 2024
0c26dc0
[Cadence] Enabled x86 executor flow with numerical verification
zonglinpengmeta Aug 12, 2024
440048c
Add an activity for benchmarking only
kirklandsign Aug 12, 2024
8f46971
allow models to use customized token ids during export
helunwencser Aug 12, 2024
728a29d
Pack buffer-backed tensors correctly when moving into and out of staging
SS-JIA Aug 12, 2024
3e0eb0f
Do not print eos (#4654)
helunwencser Aug 12, 2024
b165c28
Implement load_into for file data loader
JacobSzwejbka Aug 12, 2024
b6de6ed
Fix periodic run and model name for benchmarking
guangy10 Aug 12, 2024
5e9bab8
Delete dead code
kirklandsign Aug 13, 2024
56f843b
Move metadata util to extension/llm/runner.
shoumikhin Aug 13, 2024
e71fa03
Add stories ci for qnn
cccclai Aug 13, 2024
eb28253
Update base for Update on "[llava][18/N] Move token generation loop t…
larryliu0820 Aug 13, 2024
be01c74
Update on "[llava][18/N] Move token generation loop to a class"
larryliu0820 Aug 13, 2024
4a2dbf8
Update base for Update on "[llava][18/N] Move token generation loop t…
larryliu0820 Aug 13, 2024
0bda164
Update on "[llava][18/N] Move token generation loop to a class"
larryliu0820 Aug 13, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
10 changes: 10 additions & 0 deletions .buckconfig
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,13 @@

[parser]
target_platform_detector_spec = target:root//...->prelude//platforms:default target:shim//...->prelude//platforms:default

# Limit the number of files that the buck daemon needs to monitor. If every
# submodule is cloned recursively, some system can fail to build with "OS file
# watch limit reached".
[project]
ignore = \
.git, \
**/.git, \
cmake-out, \
pip-out
8 changes: 8 additions & 0 deletions .ci/docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ case "${IMAGE_NAME}" in
ARM_SDK=yes
CLANG_VERSION=12
;;
executorch-ubuntu-22.04-qnn-sdk)
QNN_SDK=yes
CLANG_VERSION=12
;;
executorch-ubuntu-22.04-clang12-android)
LINTRUNNER=""
CLANG_VERSION=12
Expand All @@ -59,6 +63,9 @@ cp ../../requirements-lintrunner.txt ./
# with a new image hash when the content here is updated
cp -r ../../examples/arm/ ./arm

# Copy qnn setup script from root to here
cp -r ../../backends/qualcomm/ ./qualcomm

docker build \
--no-cache \
--progress=plain \
Expand All @@ -72,6 +79,7 @@ docker build \
--build-arg "LINTRUNNER=${LINTRUNNER:-}" \
--build-arg "BUILD_DOCS=${BUILD_DOCS}" \
--build-arg "ARM_SDK=${ARM_SDK:-}" \
--build-arg "QNN_SDK=${QNN_SDK:-}" \
--build-arg "ANDROID_NDK_VERSION=${ANDROID_NDK_VERSION:-}" \
-f "${OS}"/Dockerfile \
"$@" \
Expand Down
2 changes: 1 addition & 1 deletion .ci/docker/ci_commit_pins/buck2.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2024-02-15
2024-05-15
2 changes: 1 addition & 1 deletion .ci/docker/ci_commit_pins/pytorch.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
a52b4e22571507abc35c2d47de138497190d2e0a
b556d31586845fb1e296a975d2b85d9d325205c9
33 changes: 22 additions & 11 deletions .ci/docker/common/install_android.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,33 @@ set -ex
[ -n "${ANDROID_NDK_VERSION}" ]

install_prerequiresites() {
apt-get update
OS=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$OS" in
amzn)
# https://docs.aws.amazon.com/corretto/latest/corretto-17-ug/amazon-linux-install.html
yum install -y java-17-amazon-corretto \
ca-certificates \
ant
;;
*)
apt-get update

# NB: Need OpenJDK 17 at the minimum
apt-get install -y --no-install-recommends \
openjdk-17-jdk \
ca-certificates-java \
ant
# NB: Need OpenJDK 17 at the minimum
apt-get install -y --no-install-recommends \
openjdk-17-jdk \
ca-certificates-java \
ant

# Cleanup package manager
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Cleanup package manager
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
;;
esac
}

install_ndk() {
NDK_INSTALLATION_DIR=/opt/ndk
mkdir -p "${NDK_INSTALLATION_DIR}"
rm -rf "${NDK_INSTALLATION_DIR}" && mkdir -p "${NDK_INSTALLATION_DIR}"

pushd /tmp
# The NDK installation is cached on ossci-android S3 bucket
Expand Down Expand Up @@ -54,7 +65,7 @@ install_cmdtools() {

install_sdk() {
SDK_INSTALLATION_DIR=/opt/android/sdk
mkdir -p "${SDK_INSTALLATION_DIR}"
rm -rf "${SDK_INSTALLATION_DIR}" && mkdir -p "${SDK_INSTALLATION_DIR}"

# These are the tools needed to build Android apps
yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "platforms;android-34"
Expand Down
3 changes: 2 additions & 1 deletion .ci/docker/common/install_base.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ install_ubuntu() {
unzip \
gdb \
rsync \
libssl-dev
libssl-dev \
zip

# Cleanup package manager
apt-get autoclean && apt-get clean
Expand Down
8 changes: 6 additions & 2 deletions .ci/docker/requirements-ci.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
mpmath==1.3.0
numpy==1.25.2
numpy==1.21.3; python_version == '3.10'
numpy==1.23.2; python_version == '3.11'
numpy; python_version >= '3.12'
PyYAML==6.0.1
ruamel.yaml==0.17.32
sympy==1.12
timm==0.6.13
tomli==2.0.1
torchsr==1.0.4
transformers==4.36.0
transformers==4.38.0
zstd==1.5.5.1
pandas==2.0.3; python_version == '3.10'
pandas; python_version >= '3.11'
pytest==7.2.0
pytest-cov==4.1.0
expecttest==0.1.6
Expand Down
5 changes: 5 additions & 0 deletions .ci/docker/ubuntu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -82,5 +82,10 @@ COPY --chown=ci-user:ci-user ./arm /opt/arm
# Set up ARM SDK if needed
RUN if [ -n "${ARM_SDK}" ]; then git config --global user.email "[email protected]"; git config --global user.name "OSS CI"; bash /opt/arm/setup.sh --i-agree-to-the-contained-eula /opt/arm-sdk; chown -R ci-user:ci-user /opt/arm-sdk; fi

ARG QNN_SDK
COPY --chown=ci-user:ci-user ./qualcomm /opt/qualcomm
# Set up QNN SDK if needed
RUN if [ -n "${QNN_SDK}" ]; then git config --global user.email "[email protected]"; git config --global user.name "OSS CI"; fi

USER ci-user
CMD ["bash"]
19 changes: 19 additions & 0 deletions .ci/scripts/build-qnn-sdk.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -ex

build_qnn_backend() {
echo "Start building qnn backend."
export ANDROID_NDK_ROOT=/opt/ndk
export QNN_SDK_ROOT=/tmp/qnn/2.23.0.240531
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"

bash backends/qualcomm/scripts/build.sh --skip_aarch64 --job_number 2 --release
}

build_qnn_backend
56 changes: 56 additions & 0 deletions .ci/scripts/build_llama_android.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -exu

# shellcheck source=/dev/null
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"

install_executorch_and_backend_lib() {
echo "Installing executorch and xnnpack backend"
rm -rf cmake-android-out && mkdir cmake-android-out
ANDROID_NDK=/opt/ndk
BUCK2=buck2
ANDROID_ABI=arm64-v8a
cmake -DBUCK2="${BUCK2}" \
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
-DANDROID_ABI="${ANDROID_ABI}" \
-DANDROID_PLATFORM=android-23 \
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DXNNPACK_ENABLE_ARM_BF16=OFF \
-Bcmake-android-out .

cmake --build cmake-android-out -j4 --target install --config Release
}

build_llama_runner() {
echo "Building llama runner for Android..."
ANDROID_ABI=arm64-v8a
cmake -DBUCK2="${BUCK2}" \
-DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK"/build/cmake/android.toolchain.cmake \
-DANDROID_ABI="${ANDROID_ABI}" \
-DANDROID_PLATFORM=android-23 \
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
-DCMAKE_BUILD_TYPE=Release -DPYTHON_EXECUTABLE=python \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-Bcmake-android-out/examples/models/llama2 examples/models/llama2

cmake --build cmake-android-out/examples/models/llama2 -j4 --config Release
}
install_flatc_from_source
install_executorch_and_backend_lib
build_llama_runner
26 changes: 23 additions & 3 deletions .ci/scripts/gather_test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from examples.models import MODEL_NAME_TO_MODEL
from examples.xnnpack import MODEL_NAME_TO_OPTIONS


DEFAULT_RUNNERS = {
"linux": "linux.2xlarge",
"macos": "macos-m1-stable",
Expand All @@ -24,9 +23,11 @@
"w2l": "linux.12xlarge",
"ic4": "linux.12xlarge",
"resnet50": "linux.12xlarge",
"llava": "linux.12xlarge",
# This one causes timeout on smaller runner, the root cause is unclear (T161064121)
"dl3": "linux.12xlarge",
"emformer_join": "linux.12xlarge",
"emformer_predict": "linux.12xlarge",
}
}

Expand All @@ -35,9 +36,11 @@
# Just some examples on how custom timeout can be set
"linux": {
"mobilebert": 90,
"emformer_predict": 360,
},
"macos": {
"mobilebert": 90,
"emformer_predict": 360,
},
}

Expand All @@ -56,7 +59,7 @@ def parse_args() -> Any:
"-e",
"--event",
type=str,
choices=["pull_request", "push"],
choices=["pull_request", "push", "schedule"],
required=True,
help="GitHub CI Event. See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#on",
)
Expand All @@ -83,7 +86,21 @@ def model_should_run_on_event(model: str, event: str) -> bool:
We put higher priority and fast models to pull request and rest to push.
"""
if event == "pull_request":
return model in ["add", "ic3", "mv2", "mv3", "resnet18", "vit"]
return model in ["mv3", "vit"]
elif event == "push":
# 'emformer_predict' is running super slow. Only run it periodically
return model not in ["emformer_predict"]
else:
return True


def model_should_run_on_target_os(model: str, target_os: str) -> bool:
"""
A helper function to decide whether a model should be tested on a target os (linux/macos).
For example, a big model can be disabled in macos due to the limited macos resources.
"""
if target_os == "macos":
return model not in ["llava"]
return True


Expand Down Expand Up @@ -119,6 +136,9 @@ def export_models_for_ci() -> dict[str, dict]:
if not model_should_run_on_event(name, event):
continue

if not model_should_run_on_target_os(name, target_os):
continue

if backend == "xnnpack":
if name not in MODEL_NAME_TO_OPTIONS:
continue
Expand Down
27 changes: 19 additions & 8 deletions .ci/scripts/setup-macos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,18 @@ install_buck() {
fi

pushd .ci/docker

# TODO(huydo): This is a one-off copy of buck2 2024-02-15 to unblock Jon and
# TODO(huydo): This is a one-off copy of buck2 2024-05-15 to unblock Jon and
# re-enable ShipIt. It’s not ideal that upgrading buck2 will require a manual
# update the cached binary on S3 bucket too. Let me figure out if there is a
# way to correctly implement the previous setup of installing a new version of
# buck2 only when it’s needed. AFAIK, the complicated part was that buck2
# --version doesn't say anything w.r.t its release version, i.e. 2024-02-15.
# --version doesn't say anything w.r.t its release version, i.e. 2024-05-15.
# See D53878006 for more details.
BUCK2=buck2-aarch64-apple-darwin.zst
#
# If you need to upgrade buck2 version on S3, please reach out to Dev Infra
# team for help.
BUCK2_VERSION=$(cat ci_commit_pins/buck2.txt)
BUCK2=buck2-aarch64-apple-darwin-${BUCK2_VERSION}.zst
curl -s "https://ossci-macos.s3.amazonaws.com/${BUCK2}" -o "${BUCK2}"

zstd -d "${BUCK2}" -o buck2
Expand Down Expand Up @@ -74,16 +77,18 @@ install_sccache() {

export PATH="${SCCACHE_PATH}:${PATH}"

# Create temp directory for sccache shims
TMP_DIR=$(mktemp -d)
trap 'rm -rfv ${TMP_DIR}' EXIT
# Create temp directory for sccache shims if TMP_DIR doesn't exist
if [ -z "${TMP_DIR:-}" ]; then
TMP_DIR=$(mktemp -d)
trap 'rm -rfv ${TMP_DIR}' EXIT
export PATH="${TMP_DIR}:$PATH"
fi

write_sccache_stub "${TMP_DIR}/c++"
write_sccache_stub "${TMP_DIR}/cc"
write_sccache_stub "${TMP_DIR}/clang++"
write_sccache_stub "${TMP_DIR}/clang"

export PATH="${TMP_DIR}:$PATH"
sccache --zero-stats || true
}

Expand All @@ -104,6 +109,12 @@ print_cmake_info() {
codesign -f -s - "${CMAKE_EXEC}" || true
}

setup_macos_env_variables() {
CMAKE_PREFIX_PATH=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')
export CMAKE_PREFIX_PATH
}

setup_macos_env_variables
# NB: we need buck2 in all cases because cmake build also depends on calling
# buck2 atm
install_buck
Expand Down
29 changes: 29 additions & 0 deletions .ci/scripts/setup-qnn-deps.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -ex

install_qnn() {
echo "Start installing qnn."
QNN_INSTALLATION_DIR=/tmp/qnn
mkdir -p "${QNN_INSTALLATION_DIR}"

curl -Lo /tmp/v2.23.0.24.06.24.zip "https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.23.0.24.06.24.zip"
echo "Finishing downloading qnn sdk."
unzip -qo /tmp/v2.23.0.24.06.24.zip -d /tmp
echo "Finishing unzip qnn sdk."


# Print the content for manual verification
ls -lah "/tmp/qairt"
mv "/tmp/qairt"/* "${QNN_INSTALLATION_DIR}"
echo "Finishing installing qnn '${QNN_INSTALLATION_DIR}' ."

ls -lah "${QNN_INSTALLATION_DIR}"
}

install_qnn
Loading
Loading