Skip to content

Commit aea9736

Browse files
authored
Merge branch 'main' into jz/remove-ckpt-ci
2 parents 87b9f32 + 07266f9 commit aea9736

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+2736
-124
lines changed

.ci/scripts/test_model.sh

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -224,19 +224,22 @@ test_model_with_coreml() {
224224

225225
"${PYTHON_EXECUTABLE}" -m examples.apple.coreml.scripts.export --model_name="${MODEL_NAME}" --compute_precision "${DTYPE}"
226226
EXPORTED_MODEL=$(find "." -type f -name "${MODEL_NAME}*.pte" -print -quit)
227-
# TODO:
227+
228228
if [ -n "$EXPORTED_MODEL" ]; then
229229
EXPORTED_MODEL_WITH_DTYPE="${EXPORTED_MODEL%.pte}_${DTYPE}.pte"
230230
mv "$EXPORTED_MODEL" "$EXPORTED_MODEL_WITH_DTYPE"
231231
EXPORTED_MODEL="$EXPORTED_MODEL_WITH_DTYPE"
232-
echo "Renamed file path: $EXPORTED_MODEL"
232+
echo "OK exported model: $EXPORTED_MODEL"
233233
else
234-
echo "No .pte file found"
234+
echo "[error] failed to export model: no .pte file found"
235235
exit 1
236236
fi
237237

238238
# Run the model
239239
if [ "${should_test}" = true ]; then
240+
echo "Installing requirements needed to build coreml_executor_runner..."
241+
backends/apple/coreml/scripts/install_requirements.sh
242+
240243
echo "Testing exported model with coreml_executor_runner..."
241244
local out_dir=$(mktemp -d)
242245
COREML_EXECUTOR_RUNNER_OUT_DIR="${out_dir}" examples/apple/coreml/scripts/build_executor_runner.sh

.ci/scripts/wheel/test_macos.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,9 @@
1515
model=Model.Mv3,
1616
backend=Backend.XnnpackQuantizationDelegation,
1717
),
18-
# Enable this once CoreML is suppported out-of-the-box
19-
# https://github.com/pytorch/executorch/issues/9019
20-
# test_base.ModelTest(
21-
# model=Model.Mv3,
22-
# backend=Backend.CoreMlTest,
23-
# )
18+
test_base.ModelTest(
19+
model=Model.Mv3,
20+
backend=Backend.CoreMlTest,
21+
),
2422
]
2523
)

.github/workflows/build-wheels-linux.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ on:
66
paths:
77
- .ci/**/*
88
- .github/workflows/build-wheels-linux.yml
9+
- examples/**/*
10+
- pyproject.toml
11+
- setup.py
912
push:
1013
branches:
1114
- nightly

.github/workflows/build-wheels-macos.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ on:
66
paths:
77
- .ci/**/*
88
- .github/workflows/build-wheels-macos.yml
9+
- examples/**/*
10+
- pyproject.toml
11+
- setup.py
912
push:
1013
branches:
1114
- nightly
@@ -57,6 +60,8 @@ jobs:
5760
pre-script: ${{ matrix.pre-script }}
5861
post-script: ${{ matrix.post-script }}
5962
package-name: ${{ matrix.package-name }}
60-
runner-type: macos-m1-stable
63+
# Meta's macOS runners do not have Xcode, so use GitHub's runners.
64+
runner-type: macos-latest-xlarge
65+
setup-miniconda: true
6166
smoke-test-script: ${{ matrix.smoke-test-script }}
6267
trigger-event: ${{ github.event_name }}

.github/workflows/trunk.yml

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,22 +65,29 @@ jobs:
6565
matrix:
6666
model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe]
6767
backend: [portable, xnnpack-quantization-delegation]
68+
runner: [linux.arm64.2xlarge]
6869
include:
6970
- model: lstm
7071
backend: portable
72+
runner: linux.arm64.2xlarge
7173
- model: mul
7274
backend: portable
75+
runner: linux.arm64.2xlarge
7376
- model: softmax
7477
backend: portable
78+
runner: linux.arm64.2xlarge
7579
- model: phi_4_mini
7680
backend: portable
81+
runner: linux.arm64.m7g.4xlarge
7782
- model: qwen2_5
7883
backend: portable
84+
runner: linux.arm64.2xlarge
7985
- model: llama3_2_vision_encoder
8086
backend: portable
87+
runner: linux.arm64.2xlarge
8188
fail-fast: false
8289
with:
83-
runner: linux.arm64.2xlarge
90+
runner: ${{ matrix.runner }}
8491
docker-image: executorch-ubuntu-22.04-gcc11-aarch64
8592
submodules: 'true'
8693
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
@@ -536,9 +543,8 @@ jobs:
536543
git clone https://github.com/huggingface/optimum-executorch
537544
cd optimum-executorch
538545
# There is no release yet, for CI stability, always test from the same commit on main
539-
git checkout 6a7e83f3eee2976fa809335bfb78a45b1ea1cb25
540-
pip install .
541-
pip install accelerate sentencepiece
546+
git checkout 577a2b19670e4c643a5c6ecb09bf47b9a699e7c6
547+
pip install .[tests]
542548
pip list
543549
echo "::endgroup::"
544550

backends/arm/test/conftest.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,10 @@ def pytest_configure(config):
4444
)
4545
# Only enable if we also have the TOSA reference model available.
4646
pytest._test_options["corstone_fvp"] = True # type: ignore[attr-defined]
47-
pytest._test_options["llama_inputs"] = config.option.llama_inputs # type: ignore[attr-defined]
47+
48+
if getattr(config.option, "llama_inputs", False) and config.option.llama_inputs:
49+
pytest._test_options["llama_inputs"] = config.option.llama_inputs # type: ignore[attr-defined]
50+
4851
pytest._test_options["fast_fvp"] = False # type: ignore[attr-defined]
4952
if getattr(config.option, "fast_fvp", False):
5053
pytest._test_options["fast_fvp"] = config.option.fast_fvp # type: ignore[attr-defined]

backends/arm/test/misc/test_debug_feats.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,10 +197,10 @@ def test_collate_tosa_BI_tests(self):
197197
"test_collate_tosa_tests/tosa-bi/TestCollateTosaTests/test_collate_tosa_BI_tests"
198198
)
199199
assert os.path.exists(
200-
"test_collate_tosa_tests/tosa-bi/TestCollateTosaTests/test_collate_tosa_BI_tests/output_tag6.tosa"
200+
"test_collate_tosa_tests/tosa-bi/TestCollateTosaTests/test_collate_tosa_BI_tests/output_tag6_TOSA-0.80+BI.tosa"
201201
)
202202
assert os.path.exists(
203-
"test_collate_tosa_tests/tosa-bi/TestCollateTosaTests/test_collate_tosa_BI_tests/desc_tag6.json"
203+
"test_collate_tosa_tests/tosa-bi/TestCollateTosaTests/test_collate_tosa_BI_tests/desc_tag6_TOSA-0.80+BI.json"
204204
)
205205

206206
os.environ.pop("TOSA_TESTCASES_BASE_PATH")

backends/arm/tosa_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def preprocess( # noqa: C901
125125
dbg_tosa_dump(
126126
tosa_graph,
127127
artifact_path,
128-
suffix="{}".format(f"_{tag}" if tag else ""),
128+
suffix="{}".format(f"_{tag}" if tag else "") + (f"_{tosa_spec}"),
129129
)
130130

131131
# Serialize and return the TOSA flatbuffer.

backends/vulkan/runtime/api/Context.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ Context* context() {
272272

273273
VkPipeline Context::get_shader_pipeline(
274274
const vkapi::ShaderInfo& shader,
275-
const vkapi::SpecVarList& spec_constants) {
275+
const vkapi::SpecVarList& additional_constants) {
276276
const uint32_t push_constants_size = 128u;
277277

278278
VkDescriptorSetLayout shader_layout =
@@ -281,12 +281,15 @@ VkPipeline Context::get_shader_pipeline(
281281
pipeline_layout_cache().retrieve(shader_layout, push_constants_size);
282282

283283
const utils::WorkgroupSize local_workgroup_size(4u, 4u, 1u);
284+
vkapi::SpecVarList spec_constants = {
285+
SV(local_workgroup_size[0u]),
286+
SV(local_workgroup_size[1u]),
287+
SV(local_workgroup_size[2u])};
288+
289+
spec_constants.append(additional_constants);
284290

285291
VkPipeline pipeline = pipeline_cache().retrieve(
286-
{pipeline_layout,
287-
shader_cache().retrieve(shader),
288-
spec_constants,
289-
local_workgroup_size});
292+
{pipeline_layout, shader_cache().retrieve(shader), spec_constants});
290293

291294
return pipeline;
292295
}

backends/vulkan/runtime/graph/ops/glsl/clone.glsl

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,15 @@
88

99
#version 450 core
1010

11+
#include "indexing_utils.h"
12+
1113
#define PRECISION ${PRECISION}
1214

1315
layout(std430) buffer;
1416

15-
layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
16-
layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
17-
18-
layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits {
19-
ivec3 out_limits;
20-
};
17+
${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
18+
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
19+
${layout_declare_ubo(B, "ivec3", "out_limits")}
2120

2221
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
2322

@@ -26,5 +25,5 @@ void main() {
2625
if (any(greaterThanEqual(pos, out_limits))) {
2726
return;
2827
}
29-
imageStore(image_out, pos, texelFetch(image_in, pos, 0));
28+
imageStore(t_out, pos, load_texel(t_in, pos));
3029
}

backends/vulkan/runtime/graph/ops/glsl/clone.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ clone:
22
parameter_names_with_default_values:
33
DTYPE: float
44
NDIM: 3
5+
STORAGE: texture3d
56
generate_variant_forall:
67
DTYPE:
78
- VALUE: half

backends/vulkan/runtime/graph/ops/glsl/full.glsl

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,9 @@
1818

1919
layout(std430) buffer;
2020

21-
layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
22-
23-
layout(set = 0, binding = 1) uniform PRECISION restrict Sizes {
24-
ivec4 sizes;
25-
};
26-
27-
layout(set = 0, binding = 2) uniform PRECISION restrict FillVal {
28-
float fill_value;
29-
};
21+
${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
22+
${layout_declare_ubo(B, "ivec4", "sizes")}
23+
${layout_declare_ubo(B, "float", "fill_value")}
3024

3125
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
3226

@@ -50,5 +44,5 @@ void main() {
5044
outtex = outtex * valid_idx;
5145
}
5246

53-
imageStore(image_out, POS, outtex);
47+
imageStore(t_out, POS, outtex);
5448
}

backends/vulkan/runtime/graph/ops/glsl/full.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ full:
99
NDIM: 3
1010
DTYPE: float
1111
PACKING: C_packed
12+
STORAGE: texture3d
1213
generate_variant_forall:
1314
DTYPE:
1415
- VALUE: half

backends/vulkan/runtime/graph/ops/glsl/max_pool2d.glsl

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,12 @@
1515

1616
layout(std430) buffer;
1717

18-
layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
19-
layout(set = 0, binding = 1, ${IMAGE_FORMAT["int"]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM]["int"]} image_idx;
20-
layout(set = 0, binding = 2) uniform PRECISION sampler3D image_in;
21-
22-
layout(set = 0, binding = 3) uniform PRECISION restrict OutLimits {
23-
ivec3 out_limits;
24-
};
25-
26-
layout(set = 0, binding = 4) uniform PRECISION restrict InSizes {
27-
ivec4 in_sizes;
28-
};
29-
30-
layout(set = 0, binding = 5) uniform PRECISION restrict Params {
31-
ivec2 kernel_size;
32-
ivec2 stride;
33-
ivec2 padding;
34-
ivec2 dilation;
35-
};
18+
${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
19+
${layout_declare_tensor(B, "w", "t_idx", "int", STORAGE)}
20+
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
21+
${layout_declare_ubo(B, "ivec3", "out_limits")}
22+
${layout_declare_ubo(B, "ivec4", "in_sizes")}
23+
${layout_declare_ubo(B, "ivec2", "kernel_size", "ivec2", "stride", "ivec2", "padding", "ivec2", "dilation")}
3624

3725
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
3826

@@ -54,7 +42,7 @@ void main() {
5442
for (int y = start.y; y < end.y; y += dilation.y) {
5543
for (int x = start.x; x < end.x; x += dilation.x) {
5644
if ((x >= 0 && x < in_sizes.x) && (y >= 0 && y < in_sizes.y)) {
57-
const vec4 cur_texel = texelFetch(image_in, ivec3(x, y, pos.z), 0);
45+
const vec4 cur_texel = load_texel(t_in, ivec3(x, y, pos.z));
5846

5947
// Set idx if value is greatest in the pool; else, keep the existing idx.
6048
ivec4 cur_idx = ivec4(x + int(in_sizes.x) * y);
@@ -66,6 +54,6 @@ void main() {
6654
}
6755
}
6856

69-
imageStore(image_out, pos, out_texel);
70-
imageStore(image_idx, pos, idx_texel);
57+
imageStore(t_out, pos, out_texel);
58+
imageStore(t_idx, pos, idx_texel);
7159
}

backends/vulkan/runtime/graph/ops/glsl/max_pool2d.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ max_pool2d:
88
parameter_names_with_default_values:
99
NDIM: 3
1010
DTYPE: float
11+
STORAGE: texture3d
1112
generate_variant_forall:
1213
DTYPE:
1314
- VALUE: half

backends/vulkan/runtime/graph/ops/glsl/permute.glsl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ layout(std430) buffer;
1616

1717
#include "indexing_utils.h"
1818

19-
layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
20-
layout(set = 0, binding = 1) uniform PRECISION ${SAMPLER_T[NDIM][DTYPE]} image_in;
19+
${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
20+
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
2121

2222
layout(push_constant) uniform PRECISION restrict Block {
2323
ivec4 out_limits;
@@ -72,7 +72,7 @@ void main() {
7272
fetch_pos[packed_dim] >>= 2;
7373

7474
// fetch input texel
75-
VEC4_T inval = VEC4_T(texelFetch(image_in, fetch_pos, 0));
75+
VEC4_T inval = VEC4_T(load_texel(t_in, fetch_pos));
7676
outval[j] = inval[in_packed_dim_lane_index];
7777

7878
// go to next position in the input, that is mapped to the packed dim in the output
@@ -81,5 +81,5 @@ void main() {
8181

8282
pos[packed_dim] = int(gl_GlobalInvocationID[packed_dim]);
8383

84-
imageStore(image_out, pos, outval);
84+
imageStore(t_out, pos, outval);
8585
}

backends/vulkan/runtime/graph/ops/glsl/permute.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ permute:
22
parameter_names_with_default_values:
33
DTYPE: float
44
NDIM: 3
5+
STORAGE: texture3d
56
generate_variant_forall:
67
DTYPE:
78
- VALUE: half

backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,10 @@ void main() {
9090

9191
void main() {
9292
const u16vec2 out_pos = u16vec2(
93-
gl_GlobalInvocationID.x / out_limits.y,
94-
gl_GlobalInvocationID.x % out_limits.y);
95-
if (out_pos.x >= out_limits.x) {
93+
gl_GlobalInvocationID.x,
94+
gl_GlobalInvocationID.y);
95+
96+
if (out_pos.x >= out_limits.x || out_pos.y >= out_limits.y) {
9697
return;
9798
}
9899

backends/vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,15 +114,37 @@ void add_q_8w_linear_node(
114114
graph.sizes_ubo(mat1_W_packed)});
115115
}
116116

117-
// set global work group size to be 1 dimensional
118-
const utils::uvec3 wg_size = {
119-
static_cast<uint32_t>(graph.numel_of(out_W_packed)), 1, 1};
117+
utils::uvec3 global_wg;
118+
if (graph.is_buffer_storage(out)) {
119+
global_wg = {static_cast<uint32_t>(graph.numel_of(out_W_packed)), 1, 1};
120+
} else {
121+
global_wg = graph.logical_limits_of(out_W_packed);
122+
}
123+
124+
utils::uvec3 local_wg{8, 8, 1};
125+
int32_t out_W = graph.size_at<int32_t>(-1, out_W_packed);
126+
127+
if (graph.is_buffer_storage(out_W_packed)) {
128+
local_wg[0] = 64;
129+
local_wg[1] = 1;
130+
local_wg[2] = 1;
131+
} else {
132+
if (out_W % 8 != 0) {
133+
if (out_W % 4 == 0) {
134+
local_wg[0] = 4;
135+
local_wg[1] = 16;
136+
} else {
137+
local_wg[0] = 2;
138+
local_wg[1] = 32;
139+
}
140+
}
141+
}
120142

121143
graph.execute_nodes().emplace_back(new DispatchNode(
122144
graph,
123145
VK_KERNEL_FROM_STR(kernel_name),
124-
wg_size,
125-
graph.create_local_wg_size(wg_size),
146+
global_wg,
147+
local_wg,
126148
// Inputs and Outputs
127149
{{out_W_packed, vkapi::MemoryAccessType::WRITE},
128150
{{mat1_W_packed, q_mat2, scales}, vkapi::MemoryAccessType::READ}},

0 commit comments

Comments
 (0)