Skip to content

Fix flaky issues on MacOS CI (buck2, libzstd) #200

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .ci/scripts/setup-macos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,28 @@ install_buck() {
fi
}

# This is the same rpath fix copied from PyTorch macos setup script
# https://github.com/pytorch/pytorch/blob/main/.ci/pytorch/macos-common.sh
print_cmake_info() {
CMAKE_EXEC=$(which cmake)
echo "$CMAKE_EXEC"

export CMAKE_EXEC
# Explicitly add conda env lib folder to cmake rpath to address the flaky issue
# where cmake dependencies couldn't be found. This seems to point to how conda
# links $CMAKE_EXEC to its package cache when cloning a new environment
install_name_tool -add_rpath @executable_path/../lib "${CMAKE_EXEC}" || true
# Adding the rpath will invalidate cmake signature, so signing it again here
# to trust the executable. EXC_BAD_ACCESS (SIGKILL (Code Signature Invalid))
# with an exit code 137 otherwise
codesign -f -s - "${CMAKE_EXEC}" || true
}

# NB: we need buck2 in all cases because cmake build also depends on calling
# buck2 atm
install_buck
install_conda
install_pip_dependencies
print_cmake_info
install_executorch
build_executorch_runner "${BUILD_TOOL}"
18 changes: 15 additions & 3 deletions .ci/scripts/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

reset_buck() {
# On MacOS, buck2 daemon can get into a weird non-responsive state
buck2 kill && buck2 clean
rm -rf ~/.buck/buckd
}

retry () {
"$@" || (sleep 30 && reset_buck && "$@") || (sleep 60 && reset_buck && "$@")
}

install_executorch() {
which pip
# Install executorch, this assumes that Executorch is checked out in the
Expand Down Expand Up @@ -40,8 +50,8 @@ install_pip_dependencies() {
}

build_executorch_runner_buck2() {
# Build executorch runtime
buck2 build //examples/executor_runner:executor_runner
# Build executorch runtime with retry as this step is flaky on macos CI
retry buck2 build //examples/executor_runner:executor_runner
}

build_executorch_runner_cmake() {
Expand All @@ -50,7 +60,9 @@ build_executorch_runner_cmake() {
rm -rf "${CMAKE_OUTPUT_DIR}" && mkdir "${CMAKE_OUTPUT_DIR}"

pushd "${CMAKE_OUTPUT_DIR}" || return
cmake -DBUCK2=buck2 -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" ..
# This command uses buck2 to gather source files and buck2 could crash flakily
# on MacOS
retry cmake -DBUCK2=buck2 -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" ..
popd || return

if [ "$(uname)" == "Darwin" ]; then
Expand Down
7 changes: 5 additions & 2 deletions examples/custom_ops/test_custom_ops.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@

set -e

# shellcheck source=/dev/null
source "$(dirname "${BASH_SOURCE[0]}")/../../.ci/scripts/utils.sh"

test_buck2_custom_op_1() {
local model_name='custom_ops_1'
echo "Exporting ${model_name}.pte"
Expand All @@ -34,7 +37,7 @@ test_cmake_custom_op_1() {
(rm -rf cmake-out \
&& mkdir cmake-out \
&& cd cmake-out \
&& cmake -DBUCK2=buck2 \
&& retry cmake -DBUCK2=buck2 \
-DREGISTER_EXAMPLE_CUSTOM_OP=1 \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)

Expand Down Expand Up @@ -85,7 +88,7 @@ test_cmake_custom_op_2() {
(rm -rf cmake-out \
&& mkdir cmake-out \
&& cd cmake-out \
&& cmake -DBUCK2=buck2 \
&& retry cmake -DBUCK2=buck2 \
-DREGISTER_EXAMPLE_CUSTOM_OP=2 \
-DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
Expand Down
5 changes: 4 additions & 1 deletion examples/quantization/test_quantize.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@

set -e

# shellcheck source=/dev/null
source "$(dirname "${BASH_SOURCE[0]}")/../../.ci/scripts/utils.sh"

get_shared_lib_ext() {
UNAME=$(uname)
if [[ $UNAME == "Darwin" ]];
Expand Down Expand Up @@ -47,7 +50,7 @@ test_cmake_quantization() {
(rm -rf cmake-out \
&& mkdir cmake-out \
&& cd cmake-out \
&& cmake -DBUCK2=buck2 \
&& retry cmake -DBUCK2=buck2 \
-DREGISTER_QUANTIZED_OPS=ON \
-DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
Expand Down
9 changes: 6 additions & 3 deletions examples/selective_build/test_selective_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
# 4. (TODO) Select from a serialized model (.pte)
set -e

# shellcheck source=/dev/null
source "$(dirname "${BASH_SOURCE[0]}")/../../.ci/scripts/utils.sh"

test_buck2_select_all_ops() {
echo "Exporting MobilenetV3"
${PYTHON_EXECUTABLE} -m examples.export.export_example --model_name="mv3"
Expand Down Expand Up @@ -57,7 +60,7 @@ test_cmake_select_all_ops() {
(rm -rf cmake-out \
&& mkdir cmake-out \
&& cd cmake-out \
&& cmake -DBUCK2="$BUCK" \
&& retry cmake -DBUCK2="$BUCK" \
-DBUILD_SELECTIVE_BUILD_TEST=ON \
-DCMAKE_BUILD_TYPE=Release \
-DSELECT_ALL_OPS=ON \
Expand All @@ -81,7 +84,7 @@ test_cmake_select_ops_in_list() {
(rm -rf cmake-out \
&& mkdir cmake-out \
&& cd cmake-out \
&& cmake -DBUCK2="$BUCK" \
&& retry cmake -DBUCK2="$BUCK" \
-DMAX_KERNEL_NUM=16 \
-DBUILD_SELECTIVE_BUILD_TEST=ON \
-DCMAKE_BUILD_TYPE=Release \
Expand All @@ -108,7 +111,7 @@ test_cmake_select_ops_in_yaml() {
(rm -rf cmake-out \
&& mkdir cmake-out \
&& cd cmake-out \
&& cmake -DBUCK2="$BUCK" \
&& retry cmake -DBUCK2="$BUCK" \
-DBUILD_SELECTIVE_BUILD_TEST=ON \
-DCMAKE_BUILD_TYPE=Release \
-DSELECT_OPS_YAML=ON \
Expand Down