pytorch
diff --git a/‎.ci/scripts/gather_test_models.py
Lines changed: 5 additions & 1 deletion b/‎.ci/scripts/gather_test_models.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎.ci/scripts/test_model.sh
Lines changed: 11 additions & 1 deletion b/‎.ci/scripts/test_model.sh
Lines changed: 11 additions & 1 deletion
diff --git a/‎.ci/scripts/unittest-linux.sh
Lines changed: 15 additions & 9 deletions b/‎.ci/scripts/unittest-linux.sh
Lines changed: 15 additions & 9 deletions
diff --git a/‎.ci/scripts/unittest-macos.sh
Lines changed: 11 additions & 11 deletions b/‎.ci/scripts/unittest-macos.sh
Lines changed: 11 additions & 11 deletions
diff --git a/‎.github/workflows/pull.yml
Lines changed: 25 additions & 1 deletion b/‎.github/workflows/pull.yml
Lines changed: 25 additions & 1 deletion
diff --git a/‎.github/workflows/trunk.yml
Lines changed: 25 additions & 0 deletions b/‎.github/workflows/trunk.yml
Lines changed: 25 additions & 0 deletions
diff --git a/‎.gitmodules
Lines changed: 3 additions & 0 deletions b/‎.gitmodules
Lines changed: 3 additions & 0 deletions
diff --git a/‎CODEOWNERS
Lines changed: 1 addition & 1 deletion b/‎CODEOWNERS
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/arm_vela.py
Lines changed: 12 additions & 1 deletion b/‎backends/arm/arm_vela.py
Lines changed: 12 additions & 1 deletion
diff --git a/‎backends/arm/operator_support/__init__.py
Lines changed: 0 additions & 1 deletion b/‎backends/arm/operator_support/__init__.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎backends/arm/operator_support/bitwise_support.py
Lines changed: 0 additions & 33 deletions b/‎backends/arm/operator_support/bitwise_support.py
Lines changed: 0 additions & 33 deletions
diff --git a/‎backends/arm/operator_support/tosa_supported_operators.py
Lines changed: 35 additions & 6 deletions b/‎backends/arm/operator_support/tosa_supported_operators.py
Lines changed: 35 additions & 6 deletions
diff --git a/‎backends/arm/operators/op_avg_pool2d.py
Lines changed: 1 addition & 1 deletion b/‎backends/arm/operators/op_avg_pool2d.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/operators/op_conv2d.py
Lines changed: 8 additions & 6 deletions b/‎backends/arm/operators/op_conv2d.py
Lines changed: 8 additions & 6 deletions
@@ -104,8 +104,12 @@ def model_should_run_on_target_os(model: str, target_os: str) -> bool:
     For example, a big model can be disabled in macos due to the limited macos resources.
     """
     if target_os == "macos":
+        # Disabled in macos due to limited resources, and should stay that way even if
+        # we otherwise re-enable.
         return model not in ["llava"]
-    return True
+    # Disabled globally because we have test-llava-runner-linux that does a more
+    # comprehensive E2E test of llava.
+    return model not in ["llava"]
 
 
 def export_models_for_ci() -> dict[str, dict]:
 
@@ -91,7 +91,17 @@ test_model() {
     # Install requirements for llama vision.
     bash examples/models/llama3_2_vision/install_requirements.sh
   fi
-  # python3 -m examples.portable.scripts.export --model_name="llama2" should works too
+  if [[ "${MODEL_NAME}" == "qwen2_5" ]]; then
+      # Install requirements for export_llama
+      bash examples/models/llama/install_requirements.sh
+      # Test export_llama script: python3 -m examples.models.llama.export_llama.
+      # Use Llama random checkpoint with Qwen 2.5 1.5b model configuration.
+      "${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/qwen2_5/1_5b_config.json
+      rm "./${MODEL_NAME}.pte"
+      return  # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears.
+  fi
+
+  # Export a basic .pte and run the model.
   "${PYTHON_EXECUTABLE}" -m examples.portable.scripts.export --model_name="${MODEL_NAME}" "${STRICT}"
   run_portable_executor_runner
 }
 
@@ -27,20 +27,26 @@ eval "$(conda shell.bash hook)"
 CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
 conda activate "${CONDA_ENV}"
 
-# Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
-source .ci/scripts/setup-vulkan-linux-deps.sh
+if [[ "$BUILD_TOOL" == "cmake" ]]; then
+    # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
+    source .ci/scripts/setup-vulkan-linux-deps.sh
 
-PYTHON_EXECUTABLE=python \
-EXECUTORCH_BUILD_PYBIND=ON \
-CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
-.ci/scripts/setup-linux.sh "$BUILD_TOOL" "$BUILD_MODE"
+    PYTHON_EXECUTABLE=python \
+    EXECUTORCH_BUILD_PYBIND=ON \
+    CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
+    .ci/scripts/setup-linux.sh "$BUILD_TOOL" "$BUILD_MODE"
 
-# Install llama3_2_vision dependencies.
-PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh
+    # Install llama3_2_vision dependencies.
+    PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh
 
-if [[ "$BUILD_TOOL" == "cmake" ]]; then
     .ci/scripts/unittest-linux-cmake.sh
 elif [[ "$BUILD_TOOL" == "buck2" ]]; then
+    # Removing this breaks sccache in the Buck build, apparently
+    # because TMPDIR gets messed up? Please feel free to fix this and
+    # speed up this CI job!
+    PYTHON_EXECUTABLE=python \
+    .ci/scripts/setup-linux.sh "$BUILD_TOOL" "$BUILD_MODE"
+
     .ci/scripts/unittest-buck2.sh
 else
     echo "Unknown build tool $BUILD_TOOL"
 
@@ -30,19 +30,19 @@ export TMP_DIR=$(mktemp -d)
 export PATH="${TMP_DIR}:$PATH"
 trap 'rm -rfv ${TMP_DIR}' EXIT
 
-# Setup MacOS dependencies as there is no Docker support on MacOS atm
-PYTHON_EXECUTABLE=python \
-EXECUTORCH_BUILD_PYBIND=ON \
-CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
-${CONDA_RUN} --no-capture-output \
-.ci/scripts/setup-macos.sh "${BUILD_TOOL}" "${BUILD_MODE}"
+if [[ "$BUILD_TOOL" == "cmake" ]]; then
+    # Setup MacOS dependencies as there is no Docker support on MacOS atm
+    PYTHON_EXECUTABLE=python \
+    EXECUTORCH_BUILD_PYBIND=ON \
+    CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
+    ${CONDA_RUN} --no-capture-output \
+    .ci/scripts/setup-macos.sh "${BUILD_TOOL}" "${BUILD_MODE}"
 
-# Install llama3_2_vision dependencies.
-PYTHON_EXECUTABLE=python \
-${CONDA_RUN} --no-capture-output \
-./examples/models/llama3_2_vision/install_requirements.sh
+    # Install llama3_2_vision dependencies.
+    PYTHON_EXECUTABLE=python \
+    ${CONDA_RUN} --no-capture-output \
+    ./examples/models/llama3_2_vision/install_requirements.sh
 
-if [[ "$BUILD_TOOL" == "cmake" ]]; then
     .ci/scripts/unittest-macos-cmake.sh
 elif [[ "$BUILD_TOOL" == "buck2" ]]; then
     .ci/scripts/unittest-buck2.sh
 
@@ -56,6 +56,30 @@ jobs:
         # Build and test ExecuTorch with the add model on portable backend.
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "add" "${BUILD_TOOL}" "portable"
 
+  test-pip-install-editable-mode-linux:
+    name: test-pip-install-editable-mode-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-clang12
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+        # Debug
+        which pip
+        PYTHON_EXECUTABLE=python bash ./install_executorch.sh --editable --pybind xnnpack --use-pt-pinned-commit
+        # Try to import extension library
+        python -c "from executorch.extension.llm.custom_ops import custom_ops"
+
   test-models-linux:
     name: test-models-linux
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
@@ -480,7 +504,7 @@ jobs:
 
         # Setup install_requirements for llama
         PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
-        
+
         # Test static llama weight sharing and accuracy
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama.sh
 
 
@@ -36,6 +36,31 @@ jobs:
 
           PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --target-os macos --event "${GITHUB_EVENT_NAME}"
 
+  test-pip-install-editable-mode-macos:
+    name: test-pip-install-editable-mode-macos
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+    with:
+      runner: macos-m1-stable
+      python-version: '3.11'
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+        # Debug
+        which pip
+        bash .ci/scripts/setup-conda.sh
+        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash ./install_executorch.sh --editable --pybind xnnpack
+        # Try to import extension library
+        python -c "from executorch.extension.llm.custom_ops import custom_ops"
+
   test-models-macos:
     name: test-models-macos
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
 
@@ -70,3 +70,6 @@
 [submodule "third-party/pocketfft"]
 	path = third-party/pocketfft
 	url = https://github.com/mreineck/pocketfft
+[submodule "shim"]
+	path = shim
+	url = https://github.com/facebook/buck2-shims-meta
@@ -32,7 +32,7 @@
 /examples/llm_manual @larryliu0820
 /examples/llm_pte_finetuning @JacobSzwejbka
 /examples/mediatek @cccclai
-/examples/models @lucylq
+/examples/models @lucylq @jackzhxng
 /examples/portable @larryliu0820 @manuelcandales
 /examples/qualcomm @cccclai
 /examples/selective_build @lucylq @larryliu0820 @JacobSzwejbka
 
@@ -12,7 +12,13 @@
 from typing import List
 
 import numpy as np
-from ethosu.vela import vela  # type: ignore
+
+try:
+    from ethosu.vela import vela  # type: ignore
+
+    has_vela = True
+except ImportError:
+    has_vela = False
 
 
 # Pack either input or output tensor block, compose the related arrays into
@@ -45,6 +51,11 @@ def vela_compile(
     """
     Compile a TOSA graph to a binary stream for ArmBackendEthosU using Vela.
     """
+    if not has_vela:
+        raise RuntimeError(
+            "ethos-u-vela pip package couldn't be imported. Make sure it's installed!"
+        )
+
     with tempfile.TemporaryDirectory() as tmpdir:
         tosaname = "out.tosa"
         tosa_path = os.path.join(tmpdir, tosaname)
 
@@ -6,7 +6,6 @@
 # pyre-unsafe
 
 from . import (  # noqa
-    bitwise_support,
     convolution_support,
     pool_2d_support,
     reduce_sum_support,
 
@@ -11,13 +11,13 @@
 from typing import final, Optional, Sequence, Type
 
 import torch
-
 import torch.fx as fx
+
 from executorch.backends.arm._passes.arm_pass_utils import get_first_fake_tensor
 from executorch.backends.arm._passes.fuse_quantized_activation_pass import (
     FuseQuantizedActivationPass,
 )
-from executorch.backends.arm.tosa_specification import TosaSpecification
+from executorch.backends.arm.tosa_specification import Tosa_0_80, TosaSpecification
 from executorch.exir.dialects._ops import ops as exir_ops
 from torch.fx.passes.operator_support import any_chain, chain, OperatorSupportBase
 from torch.fx.passes.utils.source_matcher_utils import get_source_partitions
@@ -90,6 +90,7 @@ def tosa_support_factory(
     if not tosa_spec.support_float():
         negative_checks.append(NeedsDecompositionCheck())
         negative_checks.append(CheckProperQuantization())
+        negative_checks.append(EthosU55NotSupported(tosa_spec))
     return chain(
         any_chain(
             BaseTOSASupportList(),
@@ -111,6 +112,9 @@ def is_node_supported(
         supported = node.op == "call_function" and node.target in [
             exir_ops.edge.aten.abs.default,
             exir_ops.edge.aten.add.Tensor,
+            exir_ops.edge.aten.bitwise_and.Tensor,
+            exir_ops.edge.aten.bitwise_or.Tensor,
+            exir_ops.edge.aten.bitwise_xor.Tensor,
             exir_ops.edge.aten.expand_copy.default,
             exir_ops.edge.aten.cat.default,
             exir_ops.edge.aten.clamp.default,
@@ -170,6 +174,31 @@ def is_node_supported(
         return supported
 
 
+class EthosU55NotSupported(OperatorSupportBase):
+    """
+    Certain operators are not supported on U55. These are listed in `unsupported` in
+    is_node_supported().
+    """
+
+    def __init__(self, tosa_spec: TosaSpecification):
+        self.tosa_spec = tosa_spec
+
+    def is_node_supported(
+        self, submodules: typing.Mapping[str, torch.nn.Module], node: fx.Node
+    ) -> bool:
+        if isinstance(self.tosa_spec, Tosa_0_80) and self.tosa_spec.is_U55_subset:
+            unsupported_ops = [
+                exir_ops.edge.aten.bitwise_and.Tensor,
+                exir_ops.edge.aten.bitwise_or.Tensor,
+                exir_ops.edge.aten.bitwise_xor.Tensor,
+            ]
+
+            if node.target in unsupported_ops:
+                return False
+
+        return True
+
+
 class NeedsDecompositionCheck(OperatorSupportBase):
     """
     Targeted operators need to be decomposed prior to quantization in order to get a pair of q-dq-nodes surrounding
@@ -310,11 +339,11 @@ def is_node_supported(
         if not input_quantized:
             return False
 
-        output_quantized = output_quantized or all(
-            (output_node.target == self.q_op)
-            or (not get_first_fake_tensor(output_node).dtype.is_floating_point)
-            for output_node in node.users
+        all_q_users = all(
+            (output_node.target == self.q_op) for output_node in node.users
         )
+        is_floating_point = get_first_fake_tensor(node).dtype.is_floating_point
+        output_quantized = output_quantized or all_q_users or not is_floating_point
 
         if not output_quantized:
             return False
 
@@ -41,7 +41,7 @@ def _build_generic_avgpool2d(
         output: TosaArg,
         input_zp: int,
         output_zp: int,
-        accumulator_type,
+        accumulator_type: ts.DType,
     ) -> None:
         input_tensor = inputs[0]
 
 
@@ -22,8 +22,6 @@
 from executorch.backends.arm.tosa_quant_utils import build_rescale_conv_output
 from executorch.backends.arm.tosa_utils import build_reshape, tosa_shape
 
-from serializer.tosa_serializer import TosaOp
-
 
 @register_node_visitor
 class Conv2dVisitor(NodeVisitor):
@@ -36,8 +34,12 @@ def __init__(self, *args):
     # `(input + 2 * pad - dilation * (weight - 1) - 1) / stride`
     # must be an integer, but tosa currently strictly require this property.
     # This function adjusts the pad value to meet the requirement.
-    def adjust_pad_if_needed(self, input, weight, stride, pad, dilation):
-        mod_remainder = (input + 2 * pad - dilation * (weight - 1) - 1) % stride
+    def adjust_pad_if_needed(
+        self, input_size: int, input_weight: int, stride: int, pad: int, dilation: int
+    ) -> int:
+        mod_remainder = (
+            input_size + 2 * pad - dilation * (input_weight - 1) - 1
+        ) % stride
 
         # No need to adjust
         if mod_remainder == 0:
@@ -143,11 +145,11 @@ def define_node(
             build_reshape(
                 tosa_graph, weight.name, weight_post_shape, weight_reshaped.name
             )
-            tosa_op = TosaOp.Op().DEPTHWISE_CONV2D
+            tosa_op = ts.TosaOp.Op().DEPTHWISE_CONV2D
             weight_name = weight_reshaped.name
         else:
             """Regular convolution case"""
-            tosa_op = TosaOp.Op().CONV2D
+            tosa_op = ts.TosaOp.Op().CONV2D
             weight_name = weight.name
 
         tosa_graph.addOperator(