Merge branch 'main' into export-D69600543

cmt0 · web-flow · commit 5e830c8bb1cc · 2025-02-20T17:14:21.000-06:00
diff --git a/.ci/scripts/setup-linux.sh b/.ci/scripts/setup-linux.sh
@@ -22,7 +22,7 @@ fi
 # have already been installed, so we use PyTorch build from source here instead
 # of nightly. This allows CI to test against latest commits from PyTorch
 install_executorch "use-pt-pinned-commit"
-build_executorch_runner "${BUILD_TOOL}"
+build_executorch_runner "${BUILD_TOOL}" "${2:-Release}"
 
 if [[ "${GITHUB_BASE_REF:-}" == *main* || "${GITHUB_BASE_REF:-}" == *gh* ]]; then
   do_not_use_nightly_on_ci
diff --git a/.ci/scripts/setup-macos.sh b/.ci/scripts/setup-macos.sh
@@ -136,7 +136,7 @@ install_pytorch_and_domains
 # We build PyTorch from source here instead of using nightly. This allows CI to test against
 # the pinned commit from PyTorch
 install_executorch "use-pt-pinned-commit"
-build_executorch_runner "${BUILD_TOOL}"
+build_executorch_runner "${BUILD_TOOL}" "${2:-Release}"
 
 if [[ "${GITHUB_BASE_REF:-}" == *main* || "${GITHUB_BASE_REF:-}" == *gh* ]]; then
   do_not_use_nightly_on_ci
diff --git a/.ci/scripts/unittest-linux.sh b/.ci/scripts/unittest-linux.sh
@@ -14,6 +14,14 @@ else
   exit 1
 fi
 
+BUILD_MODE=$2
+if [[ "${BUILD_MODE:-}" =~ ^(Debug|Release)$ ]]; then
+    echo "Running tests in build mode ${BUILD_MODE} ..."
+else
+    echo "Unsupported build mode ${BUILD_MODE}, options are Debug or Release."
+    exit 1
+fi
+
 # The generic Linux job chooses to use base env, not the one setup by the image
 eval "$(conda shell.bash hook)"
 CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
@@ -25,7 +33,7 @@ source .ci/scripts/setup-vulkan-linux-deps.sh
 PYTHON_EXECUTABLE=python \
 EXECUTORCH_BUILD_PYBIND=ON \
 CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
-.ci/scripts/setup-linux.sh "$BUILD_TOOL"
+.ci/scripts/setup-linux.sh "$BUILD_TOOL" "$BUILD_MODE"
 
 # Install llama3_2_vision dependencies.
 PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh
diff --git a/.ci/scripts/unittest-macos.sh b/.ci/scripts/unittest-macos.sh
@@ -14,6 +14,14 @@ else
   exit 1
 fi
 
+BUILD_MODE=$2
+if [[ $BUILD_MODE =~ ^(Debug|Release)$ ]]; then
+    echo "Running tests in build mode ${BUILD_MODE} ..."
+else
+    echo "Unsupported build mode ${BUILD_MODE}, options are Debug or Release."
+    exit 1
+fi
+
 bash .ci/scripts/setup-conda.sh
 eval "$(conda shell.bash hook)"
 
@@ -27,7 +35,7 @@ PYTHON_EXECUTABLE=python \
 EXECUTORCH_BUILD_PYBIND=ON \
 CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
 ${CONDA_RUN} --no-capture-output \
-.ci/scripts/setup-macos.sh cmake
+.ci/scripts/setup-macos.sh "${BUILD_TOOL}" "${BUILD_MODE}"
 
 # Install llama3_2_vision dependencies.
 PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh
diff --git a/.ci/scripts/utils.sh b/.ci/scripts/utils.sh
@@ -109,7 +109,7 @@ build_executorch_runner_cmake() {
   pushd "${CMAKE_OUTPUT_DIR}" || return
   # This command uses buck2 to gather source files and buck2 could crash flakily
   # on MacOS
-  retry cmake -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" -DCMAKE_BUILD_TYPE=Release ..
+  retry cmake -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" -DCMAKE_BUILD_TYPE="${1:-Release}" ..
   popd || return
 
   if [ "$(uname)" == "Darwin" ]; then
@@ -124,7 +124,7 @@ build_executorch_runner() {
   if [[ $1 == "buck2" ]]; then
     build_executorch_runner_buck2
   elif [[ $1 == "cmake" ]]; then
-    build_executorch_runner_cmake
+    build_executorch_runner_cmake "$2"
   else
     echo "Invalid build tool $1. Only buck2 and cmake are supported atm"
     exit 1
diff --git a/.github/scripts/extract_benchmark_results.py b/.github/scripts/extract_benchmark_results.py
@@ -229,21 +229,22 @@ def extract_ios_metric(
 
     elif method == "forward":
         if metric_name == "Clock Monotonic Time, s":
-            benchmark_result["metric"] = (
-                "generate_time(ms)"
-                if "llama" in test_name
-                else "avg_inference_latency(ms)"
-            )
+            benchmark_result["metric"] = "avg_inference_latency(ms)"
             benchmark_result["actualValue"] = metric_value * 1000
 
         elif metric_name == "Memory Peak Physical, kB":
             # NB: Showing the value in mB is friendlier IMO
             benchmark_result["metric"] = "peak_inference_mem_usage(mb)"
             benchmark_result["actualValue"] = metric_value / 1024
 
-    elif method == "generate" and metric_name == "Tokens Per Second, t/s":
-        benchmark_result["metric"] = "token_per_sec"
-        benchmark_result["actualValue"] = metric_value
+    elif method == "generate":
+        if metric_name == "Clock Monotonic Time, s":
+            benchmark_result["metric"] = "generate_time(ms)"
+            benchmark_result["actualValue"] = metric_value * 1000
+
+        elif metric_name == "Tokens Per Second, t/s":
+            benchmark_result["metric"] = "token_per_sec"
+            benchmark_result["actualValue"] = metric_value
 
     return benchmark_result
 
diff --git a/.github/workflows/_unittest.yml b/.github/workflows/_unittest.yml
@@ -7,6 +7,10 @@ on:
         required: true
         type: string
         description: Name of the docker image to use.
+      build-mode:
+        required: true
+        type: string
+        description: Build mode to use, Debug or Release.
       build-tool:
         required: true
         type: string
@@ -30,7 +34,7 @@ jobs:
       timeout: 90
       script: |
         set -eux
-        .ci/scripts/unittest-linux.sh "${{ inputs.build-tool }}"
+        .ci/scripts/unittest-linux.sh "${{ inputs.build-tool }}" "${{ inputs.build-mode }}"
 
   macos:
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -41,4 +45,4 @@ jobs:
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       script: |
         set -eux
-        .ci/scripts/unittest-macos.sh "${{ inputs.build-tool }}"
+        .ci/scripts/unittest-macos.sh "${{ inputs.build-tool }}" "${{ inputs.build-mode }}"
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -367,6 +367,7 @@ jobs:
       id-token: write
       contents: read
     with:
+      build-mode: Debug
       build-tool: cmake
       docker-image: executorch-ubuntu-22.04-clang12
 
@@ -376,6 +377,7 @@ jobs:
       id-token: write
       contents: read
     with:
+      build-mode: Debug
       build-tool: buck2
       docker-image: executorch-ubuntu-22.04-clang12
 
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -489,3 +489,13 @@ jobs:
         PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
         # Test llama2
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
+
+  unittest-release:
+    uses: ./.github/workflows/_unittest.yml
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      build-mode: Release
+      build-tool: cmake
+      docker-image: executorch-ubuntu-22.04-clang12
diff --git a/examples/models/llama/TARGETS b/examples/models/llama/TARGETS
@@ -95,11 +95,8 @@ runtime.command_alias(
 )
 
 runtime.python_library(
-    name = "export_library",
+    name = "source_transformation",
     srcs = [
-        "export_llama.py",
-        "export_llama_lib.py",
-        "model.py",
         "source_transformation/apply_spin_quant_r1_r2.py",
         "source_transformation/attention.py",
         "source_transformation/lora.py",
@@ -114,6 +111,15 @@ runtime.python_library(
         "source_transformation/vulkan_rope.py",
         "source_transformation/attention_sink.py",
     ],
+)
+
+runtime.python_library(
+    name = "export_library",
+    srcs = [
+        "export_llama.py",
+        "export_llama_lib.py",
+        "model.py",
+    ],
     _is_external_target = True,
     base_module = "executorch.examples.models.llama",
     visibility = [
@@ -123,6 +129,7 @@ runtime.python_library(
         "@EXECUTORCH_CLIENTS",
     ],
     deps = [
+        ":source_transformation",
         "//ai_codesign/gen_ai/fast_hadamard_transform:fast_hadamard_transform",
         "//caffe2:torch",
         "//executorch/backends/vulkan/_passes:vulkan_passes",
diff --git a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm b/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm
@@ -82,7 +82,7 @@ @implementation LLaMATests
         return;
       }
       TokensPerSecondMetric *tokensPerSecondMetric = [TokensPerSecondMetric new];
-      [testCase measureWithMetrics:@[ tokensPerSecondMetric, [XCTMemoryMetric new] ]
+      [testCase measureWithMetrics:@[ tokensPerSecondMetric, [XCTClockMetric new], [XCTMemoryMetric new] ]
                             block:^{
                               tokensPerSecondMetric.tokenCount = 0;
                               const auto status = runner->generate(
diff --git a/runtime/core/result.h b/runtime/core/result.h
@@ -59,8 +59,13 @@ class Result final {
    * a non-Ok value.
    */
   /* implicit */ Result(Error error)
-      : error_(error == Error::Ok ? Error::Internal : error),
-        hasValue_(false) {}
+      : error_(error == Error::Ok ? Error::Internal : error), hasValue_(false) {
+    if ET_UNLIKELY (error == Error::Ok) {
+      ET_LOG(
+          Debug,
+          "Attempted to create Result from Error::Ok, this has been converted to Error::Internal.");
+    }
+  }
 
   /// Value copy constructor.
   /* implicit */ Result(const T& val) : value_(val), hasValue_(true) {}
diff --git a/runtime/core/test/error_handling_test.cpp b/runtime/core/test/error_handling_test.cpp
@@ -110,6 +110,7 @@ TEST(ErrorHandlingTest, ResultBasic) {
 }
 
 TEST(ErrorHandlingTest, OkErrorNotPossible) {
+  executorch::runtime::runtime_init();
   Result<uint32_t> r(Error::Ok);
   ASSERT_FALSE(r.ok());
   ASSERT_NE(r.error(), Error::Ok);
diff --git a/shim/xplat/executorch/build/runtime_wrapper.bzl b/shim/xplat/executorch/build/runtime_wrapper.bzl
@@ -171,7 +171,7 @@ def _patch_kwargs_common(kwargs):
     # don't pick up unexpected clients while things are still in flux.
     if not kwargs.pop("_is_external_target", False):
         for target in kwargs.get("visibility", []):
-            if not (target.startswith("//executorch") or target.startswith("@")):
+            if not (target.startswith("//executorch") or target.startswith("//pytorch/tokenizers") or target.startswith("@")):
                 fail("Please manage all external visibility using the " +
                      "EXECUTORCH_CLIENTS list in " +
                      "//executorch/build/fb/clients.bzl. " +
diff --git a/test/utils/targets.bzl b/test/utils/targets.bzl
@@ -21,6 +21,7 @@ def define_common_targets():
             ],
             visibility = [
                 "//executorch/...",
+                "//pytorch/tokenizers/...",
                 "@EXECUTORCH_CLIENTS",
             ],
             deps = [

Original file line number	Diff line number	Diff line change
`@@ -82,7 +82,7 @@ @implementation LLaMATests`
`82`	`82`	`return;`
`83`	`83`	`}`
`84`	`84`	`TokensPerSecondMetric *tokensPerSecondMetric = [TokensPerSecondMetric new];`
`85`		`- [testCase measureWithMetrics:@[ tokensPerSecondMetric, [XCTMemoryMetric new] ]`
	`85`	`+ [testCase measureWithMetrics:@[ tokensPerSecondMetric, [XCTClockMetric new], [XCTMemoryMetric new] ]`
`86`	`86`	`block:^{`
`87`	`87`	`tokensPerSecondMetric.tokenCount = 0;`
`88`	`88`	`const auto status = runner->generate(`
Original file line number	Diff line number	Diff line change
`@@ -110,6 +110,7 @@ TEST(ErrorHandlingTest, ResultBasic) {`
`110`	`110`	`}`
`111`	`111`
`112`	`112`	`TEST(ErrorHandlingTest, OkErrorNotPossible) {`
	`113`	`+ executorch::runtime::runtime_init();`
`113`	`114`	`Result<uint32_t> r(Error::Ok);`
`114`	`115`	`ASSERT_FALSE(r.ok());`
`115`	`116`	`ASSERT_NE(r.error(), Error::Ok);`