Skip to content

Commit c7d9d3b

Browse files
committed
Update on "[Executorch][llama] Make RoPE freq calculation broadcast for per head"
This is a workaround, may not be even worth landing, to avoid broadcasting semantics in the mul op and for that matter any binary op. Current implementation of oiptimized ops doesnt handle broadcasting and falls back to portable op implementation. This diff also fixes an issue where (as seen in llama) two tensors of binary op are not broadcasting, but they have different # of dims, which results in invocation of unoptimized path. e.g. a = [1, 1, 2048], b = [2048], out = [1, 1, 2048]. In llama case this is optimized path when generating one token at a time. Not so during pre-fill Making optimized op handle broadcasting, and support vectorization, is not hard, but may take some time. Differential Revision: [D54766067](https://our.internmc.facebook.com/intern/diff/D54766067/) [ghstack-poisoned]
2 parents bbe9b9e + 9c38abd commit c7d9d3b

File tree

106 files changed

+2983
-1005
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

106 files changed

+2983
-1005
lines changed

.ci/docker/build.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ case "${IMAGE_NAME}" in
3737
ARM_SDK=yes
3838
CLANG_VERSION=12
3939
;;
40+
executorch-ubuntu-22.04-clang12-android)
41+
LINTRUNNER=""
42+
CLANG_VERSION=12
43+
# From https://developer.android.com/ndk/downloads
44+
ANDROID_NDK_VERSION=r26c
45+
;;
4046
*)
4147
echo "Invalid image name ${IMAGE_NAME}"
4248
exit 1
@@ -66,6 +72,7 @@ docker build \
6672
--build-arg "LINTRUNNER=${LINTRUNNER:-}" \
6773
--build-arg "BUILD_DOCS=${BUILD_DOCS}" \
6874
--build-arg "ARM_SDK=${ARM_SDK:-}" \
75+
--build-arg "ANDROID_NDK_VERSION=${ANDROID_NDK_VERSION:-}" \
6976
-f "${OS}"/Dockerfile \
7077
"$@" \
7178
.

.ci/docker/common/install_android.sh

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -ex
9+
10+
# Double check if the NDK version is set
11+
[ -n "${ANDROID_NDK_VERSION}" ]
12+
13+
install_prerequiresites() {
14+
apt-get update
15+
16+
# NB: Need OpenJDK 17 at the minimum
17+
apt-get install -y --no-install-recommends \
18+
openjdk-17-jdk \
19+
ca-certificates-java \
20+
ant
21+
22+
# Cleanup package manager
23+
apt-get autoclean && apt-get clean
24+
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
25+
}
26+
27+
install_ndk() {
28+
NDK_INSTALLATION_DIR=/opt/ndk
29+
mkdir -p "${NDK_INSTALLATION_DIR}"
30+
31+
pushd /tmp
32+
# The NDK installation is cached on ossci-android S3 bucket
33+
curl -Os --retry 3 "https://ossci-android.s3.amazonaws.com/android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
34+
unzip -qo "android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
35+
36+
# Print the content for manual verification
37+
ls -lah "android-ndk-${ANDROID_NDK_VERSION}"
38+
mv "android-ndk-${ANDROID_NDK_VERSION}"/* "${NDK_INSTALLATION_DIR}"
39+
40+
popd
41+
}
42+
43+
install_cmdtools() {
44+
CMDTOOLS_FILENAME=commandlinetools-linux-11076708_latest.zip
45+
46+
pushd /tmp
47+
# The file is cached on ossci-android S3 bucket
48+
curl -Os --retry 3 "https://ossci-android.s3.us-west-1.amazonaws.com/${CMDTOOLS_FILENAME}"
49+
unzip -qo "${CMDTOOLS_FILENAME}" -d /opt
50+
51+
ls -lah /opt/cmdline-tools/bin
52+
popd
53+
}
54+
55+
install_sdk() {
56+
SDK_INSTALLATION_DIR=/opt/android/sdk
57+
mkdir -p "${SDK_INSTALLATION_DIR}"
58+
59+
# These are the tools needed to build Android apps
60+
yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "platforms;android-34"
61+
yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "build-tools;33.0.1"
62+
# And some more tools for future emulator tests
63+
yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "platform-tools"
64+
yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "tools"
65+
}
66+
67+
install_prerequiresites
68+
install_ndk
69+
install_cmdtools
70+
install_sdk

.ci/docker/ubuntu/Dockerfile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,12 @@ COPY ./requirements-lintrunner.txt requirements-lintrunner.txt
7171
RUN if [ -n "${LINTRUNNER}" ]; then bash ./install_linter.sh; fi
7272
RUN rm install_linter.sh utils.sh requirements-lintrunner.txt
7373

74+
ARG ANDROID_NDK_VERSION
75+
# Install lintrunner if needed
76+
COPY ./common/install_android.sh install_android.sh
77+
RUN if [ -n "${ANDROID_NDK_VERSION}" ]; then bash ./install_android.sh; fi
78+
RUN rm install_android.sh
79+
7480
ARG ARM_SDK
7581
COPY --chown=ci-user:ci-user ./arm /opt/arm
7682
# Set up ARM SDK if needed

.github/workflows/android.yml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
name: Build ExecuTorch Android demo apps
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
- release/*
8+
pull_request:
9+
paths:
10+
- .ci/docker/**
11+
- .github/workflows/android.yml
12+
- install_requirements.sh
13+
- examples/demo-apps/**
14+
- extension/module/**
15+
workflow_dispatch:
16+
17+
concurrency:
18+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
19+
cancel-in-progress: true
20+
21+
jobs:
22+
test-demo-android:
23+
name: test-demo-android
24+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
25+
strategy:
26+
matrix:
27+
include:
28+
- build-tool: buck2
29+
with:
30+
# NB: The example model dl3 requires lots of memory (T161064121)
31+
runner: linux.12xlarge
32+
docker-image: executorch-ubuntu-22.04-clang12-android
33+
submodules: 'true'
34+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
35+
timeout: 90
36+
script: |
37+
set -eux
38+
39+
# The generic Linux job chooses to use base env, not the one setup by the image
40+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
41+
conda activate "${CONDA_ENV}"
42+
43+
BUILD_TOOL=${{ matrix.build-tool }}
44+
# Setup MacOS dependencies as there is no Docker support on MacOS atm
45+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
46+
# Build Android demo app
47+
bash build/test_android_ci.sh

.github/workflows/apple.yml

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,17 @@ on:
55
branches:
66
- main
77
- release/*
8+
paths:
9+
- .ci/docker/**
10+
- .github/workflows/apple.yml
11+
- install_requirements.sh
12+
- backends/apple/**
13+
- build/build_apple_frameworks.sh
14+
- build/create_frameworks.sh
15+
- build/test_ios_ci.sh
16+
- examples/demo-apps/**
17+
- extension/apple/**
18+
- extension/module/**
819
pull_request:
920
paths:
1021
- .ci/docker/**
@@ -92,9 +103,9 @@ jobs:
92103
build/build_apple_frameworks.sh --coreml --mps --portable --xnnpack
93104
94105
# Bundle iOS Frameworks
95-
for FRAMEWORK in "${FRAMEWORKS[@]}"; do
96-
zip -r "${RUNNER_TEMP}/artifacts/${FRAMEWORK}-${VERSION}.zip" "cmake-out/${FRAMEWORK}.xcframework" LICENSE
97-
done
106+
for FRAMEWORK in "${FRAMEWORKS[@]}"; do (
107+
cd cmake-out && zip -r "${RUNNER_TEMP}/artifacts/${FRAMEWORK}-${VERSION}.zip" "${FRAMEWORK}.xcframework"
108+
) done
98109
99110
popd
100111
@@ -122,8 +133,8 @@ jobs:
122133
# NB: The name here needs to match the upload-artifact name from build-frameworks-ios job
123134
name: executorch-frameworks-ios
124135
path: ${{ runner.temp }}/frameworks-ios/
125-
- name: Only push to S3 from main branch
126-
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
136+
- name: Only push to S3 when running the workflow manually from main branch
137+
if: ${{ github.event_name == 'workflow_dispatch' && github.ref == 'refs/heads/main' }}
127138
shell: bash
128139
run: |
129140
set -eux

.github/workflows/docker-builds.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ jobs:
3838
- docker-image-name: executorch-ubuntu-22.04-clang12
3939
- docker-image-name: executorch-ubuntu-22.04-linter
4040
- docker-image-name: executorch-ubuntu-22.04-arm-sdk
41+
- docker-image-name: executorch-ubuntu-22.04-clang12-android
4142
env:
4243
DOCKER_IMAGE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/executorch/${{ matrix.docker-image-name }}
4344
steps:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ buck-out/
33
cmake-out/
44
cmake-android-out/
55
cmake-ios-out/
6+
ethos-u-scratch/
67
executorch.egg-info
78
__pycache__/
89
build/lib/

CMakeLists.txt

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,10 @@ option(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL
152152
option(EXECUTORCH_BUILD_GTESTS
153153
"Build googletest based test binaries" OFF)
154154

155+
# Build the AOT util extension library
156+
option(EXECUTORCH_BUILD_EXTENSION_AOT_UTIL
157+
"Build the extension/aot_util directory" OFF)
158+
155159
if(NOT BUCK2)
156160
set(BUCK2 buck2)
157161
endif()
@@ -236,11 +240,11 @@ if(EXECUTORCH_BUILD_FLATC)
236240
)
237241
endif()
238242
set(FLATC_EXECUTABLE flatc)
239-
option(FLATBUFFERS_BUILD_FLATC "" ON)
240-
option(FLATBUFFERS_BUILD_FLATHASH "" OFF)
241-
option(FLATBUFFERS_BUILD_FLATLIB "" OFF)
242-
option(FLATBUFFERS_BUILD_TESTS "" OFF)
243-
option(FLATBUFFERS_INSTALL "" OFF)
243+
set(FLATBUFFERS_BUILD_FLATC ON CACHE BOOL "")
244+
set(FLATBUFFERS_BUILD_FLATHASH OFF CACHE BOOL "")
245+
set(FLATBUFFERS_BUILD_FLATLIB OFF CACHE BOOL "")
246+
set(FLATBUFFERS_BUILD_TESTS OFF CACHE BOOL "")
247+
set(FLATBUFFERS_INSTALL OFF CACHE BOOL "")
244248
add_subdirectory(third-party/flatbuffers)
245249
endif()
246250
if(NOT FLATC_EXECUTABLE)
@@ -358,6 +362,10 @@ if(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL)
358362
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/runner_util)
359363
endif()
360364

365+
if(EXECUTORCH_BUILD_EXTENSION_AOT_UTIL)
366+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/aot_util)
367+
endif()
368+
361369
option(EXECUTORCH_BUILD_XNNPACK "Build the backends/xnnpack directory" OFF)
362370
if(EXECUTORCH_BUILD_XNNPACK)
363371
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/xnnpack)
@@ -453,7 +461,6 @@ if(EXECUTORCH_BUILD_PYBIND)
453461
etdump
454462
executorch
455463
extension_data_loader
456-
flatcc
457464
portable_ops_lib
458465
util
459466
torch

Package.swift

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// swift-tools-version:5.9
2+
/*
3+
* Copyright (c) Meta Platforms, Inc. and affiliates.
4+
* All rights reserved.
5+
*
6+
* This source code is licensed under the BSD-style license found in the
7+
* LICENSE file in the root directory of this source tree.
8+
*/
9+
10+
import PackageDescription
11+
12+
let url = "https://ossci-ios.s3.amazonaws.com/executorch"
13+
let version = "0.1.0"
14+
let coreml_sha256 = "1d2b8d2a5805a699eb39f347977894d3af5dfa763b298b926016550e9ffefda5"
15+
let executorch_sha256 = "39f19740a7c656d972e6082bae49583a6d4cc6396dea6ace2e4193688cef6225"
16+
let mps_sha256 = "866739b76baec70e603d331ff34ff9f028202fef69161f63a35d2e8a0cf502e9"
17+
let portable_sha256 = "6f761c0ae5651002e321bc6320604476ba0210f9383e535a2905cc1a74be55a3"
18+
let xnnpack_sha256 = "ef2cb2145a466a0a9e32489497c7f4880e4b582cea3883158b7ae24427d8ae7a"
19+
20+
struct Framework {
21+
let name: String
22+
let checksum: String
23+
24+
func target() -> Target {
25+
.binaryTarget(
26+
name: name,
27+
url: "\(url)/\(name)-\(version).zip",
28+
checksum: checksum
29+
)
30+
}
31+
}
32+
33+
let frameworks = [
34+
Framework(
35+
name: "coreml_backend",
36+
checksum: coreml_sha256
37+
),
38+
Framework(
39+
name: "executorch",
40+
checksum: executorch_sha256
41+
),
42+
Framework(
43+
name: "mps_backend",
44+
checksum: mps_sha256
45+
),
46+
Framework(
47+
name: "portable_backend",
48+
checksum: portable_sha256
49+
),
50+
Framework(
51+
name: "xnnpack_backend",
52+
checksum: xnnpack_sha256
53+
)
54+
]
55+
56+
let package = Package(
57+
name: "executorch",
58+
platforms: [
59+
.iOS(.v15),
60+
],
61+
products: frameworks.map { framework in
62+
.library(name: framework.name, targets: [framework.name])
63+
},
64+
targets: frameworks.map { $0.target() }
65+
)

backends/apple/coreml/partition/coreml_partitioner.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
Partitioner,
1818
PartitionResult,
1919
)
20+
from executorch.exir.backend.utils import tag_constant_data
2021
from torch.export.exported_program import ExportedProgram
2122
from torch.fx.passes.infra.partitioner import CapabilityBasedPartitioner
2223
from torch.fx.passes.operator_support import OperatorSupportBase
@@ -87,6 +88,8 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
8788
node.meta["delegation_tag"] = tag
8889
partition_tags[tag] = self.delegation_spec
8990

91+
tag_constant_data(exported_program)
92+
9093
return PartitionResult(
9194
tagged_exported_program=exported_program, partition_tags=partition_tags
9295
)

backends/apple/coreml/runtime/delegate/ETCoreMLLogging.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ typedef NS_ERROR_ENUM(ETCoreMLErrorDomain, ETCoreMLError) {
2222
ETCoreMLErrorBrokenModel, // CoreML model doesn't match the input and output specification.
2323
ETCoreMLErrorCompilationFailed, // CoreML model failed to compile.
2424
ETCoreMLErrorModelCompilationNotSupported, // CoreML model compilation is not supported by the target.
25+
ETCoreMLErrorModelProfilingNotSupported, // Model profiling is not supported by the target.
2526
ETCoreMLErrorModelSaveFailed, // Failed to save CoreML model to disk.
2627
ETCoreMLErrorModelCacheCreationFailed, // Failed to create model cache.
2728
ETCoreMLErrorInternalError, // Internal error.

backends/apple/coreml/runtime/inmemoryfs/inmemory_filesystem_py.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <pybind11/pybind11.h>
1515
#include <pybind11/pytypes.h>
1616
#include <sstream>
17+
#include <stdexcept>
1718
#include <string>
1819
#include <sys/mman.h>
1920
#include <system_error>
@@ -151,8 +152,38 @@ pybind11::bytes flatten_directory_contents(const std::string& path) {
151152
return bytes == nullptr ? pybind11::none() : pybind11::reinterpret_steal<pybind11::object>((PyObject*)bytes);
152153
}
153154

155+
/// Unflattens and writes the contents of the memory buffer at the specified path.
156+
///
157+
/// @param bytes The bytes returned from `flatten_directory_contents`.
158+
/// @param path The directory path
159+
bool unflatten_directory_contents(pybind11::bytes bytes, const std::string& path) {
160+
using namespace inmemoryfs;
161+
162+
char* buffer = nullptr;
163+
ssize_t length = 0;
164+
if (PYBIND11_BYTES_AS_STRING_AND_SIZE(bytes.ptr(), &buffer, &length)) {
165+
pybind11::pybind11_fail("Failed to extract contents of bytes object!");
166+
}
167+
std::shared_ptr<MemoryBuffer> memory_buffer =
168+
MemoryBuffer::make_unowned((void*)buffer, static_cast<size_t>(length));
169+
auto fs = inmemoryfs::make_from_buffer(memory_buffer);
170+
if (!fs) {
171+
pybind11::pybind11_fail("Failed to de-serialize bytes object!");
172+
return false;
173+
}
174+
std::error_code ec;
175+
std::filesystem::path fs_path(path);
176+
auto canonical_path = std::filesystem::canonical(fs_path);
177+
if (!fs->write_item_to_disk({}, canonical_path, true, ec)) {
178+
pybind11::pybind11_fail("Failed to write the item to disk!");
179+
return false;
180+
}
181+
182+
return true;
183+
}
154184
} // namespace executorchcoreml
155185

156186
PYBIND11_MODULE(executorchcoreml, mod) {
157187
mod.def("flatten_directory_contents", &executorchcoreml::flatten_directory_contents);
188+
mod.def("unflatten_directory_contents", &executorchcoreml::unflatten_directory_contents);
158189
}

0 commit comments

Comments
 (0)