pytorch
diff --git a/‎.ci/scripts/build-qnn-sdk.sh
Lines changed: 2 additions & 0 deletions b/‎.ci/scripts/build-qnn-sdk.sh
Lines changed: 2 additions & 0 deletions
diff --git a/‎.ci/scripts/test_llama_torchao_lowbit.sh
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/test_llama_torchao_lowbit.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/test_llava.sh
Lines changed: 2 additions & 1 deletion b/‎.ci/scripts/test_llava.sh
Lines changed: 2 additions & 1 deletion
diff --git a/‎.ci/scripts/test_phi_3_mini.sh
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/test_phi_3_mini.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/unittest-buck2.sh
Lines changed: 6 additions & 4 deletions b/‎.ci/scripts/unittest-buck2.sh
Lines changed: 6 additions & 4 deletions
diff --git a/‎.github/workflows/trunk.yml
Lines changed: 30 additions & 0 deletions b/‎.github/workflows/trunk.yml
Lines changed: 30 additions & 0 deletions
diff --git a/‎.lintrunner.toml
Lines changed: 5 additions & 7 deletions b/‎.lintrunner.toml
Lines changed: 5 additions & 7 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 1 addition & 35 deletions b/‎CMakeLists.txt
Lines changed: 1 addition & 35 deletions
diff --git a/‎backends/apple/coreml/scripts/build_tests.sh
Lines changed: 1 addition & 2 deletions b/‎backends/apple/coreml/scripts/build_tests.sh
Lines changed: 1 addition & 2 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 6 additions & 2 deletions b/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 6 additions & 2 deletions
diff --git a/‎backends/arm/_passes/scalars_to_attribute_pass.py
Lines changed: 1 addition & 1 deletion b/‎backends/arm/_passes/scalars_to_attribute_pass.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/operators/op_abs.py
Lines changed: 4 additions & 4 deletions b/‎backends/arm/operators/op_abs.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎backends/arm/operators/op_add.py
Lines changed: 4 additions & 4 deletions b/‎backends/arm/operators/op_add.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎backends/arm/operators/op_amax.py
Lines changed: 2 additions & 2 deletions b/‎backends/arm/operators/op_amax.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/operators/op_amin.py
Lines changed: 2 additions & 2 deletions b/‎backends/arm/operators/op_amin.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/operators/op_any.py
Lines changed: 2 additions & 2 deletions b/‎backends/arm/operators/op_any.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/operators/op_avg_pool2d.py
Lines changed: 4 additions & 4 deletions b/‎backends/arm/operators/op_avg_pool2d.py
Lines changed: 4 additions & 4 deletions
@@ -32,6 +32,8 @@ set_up_aot() {
       -DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
       -DEXECUTORCH_BUILD_DEVTOOLS=ON \
       -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+      -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+      -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
       -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
       -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
       -DPYTHON_EXECUTABLE=python3
 
@@ -30,6 +30,7 @@ cmake -DPYTHON_EXECUTABLE=python \
     -DCMAKE_BUILD_TYPE=Release \
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
     -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
     -DEXECUTORCH_BUILD_XNNPACK=OFF \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
 
@@ -37,6 +37,7 @@ EXECUTORCH_COMMON_CMAKE_ARGS="                      \
         -DEXECUTORCH_ENABLE_LOGGING=ON              \
         -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON      \
         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
         -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON      \
         -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON        \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON     \
@@ -146,7 +147,7 @@ run_and_verify() {
 
     # verify result.txt
     RESULT=$(cat result.txt)
-    EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress, with several players on the court. "
+    EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress, with"
 
     if [[ "${RESULT}" == *"${EXPECTED_PREFIX}"* ]]; then
         echo "Expected result prefix: ${EXPECTED_PREFIX}"
 
@@ -27,6 +27,7 @@ cmake_install_executorch_libraries() {
       -DEXECUTORCH_ENABLE_LOGGING=1 \
       -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
       -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+      -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
       -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
       -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
       -DEXECUTORCH_BUILD_XNNPACK=ON \
 
@@ -15,7 +15,7 @@ buck2 query "//backends/apple/... + //backends/example/... + \
 //kernels/optimized/... + //kernels/portable/... + //kernels/quantized/... + \
 //kernels/test/... + //runtime/... + //schema/... + //test/... + //util/..."
 
-UNBUILDABLE_OPTIMIZED_OPS_REGEX="gelu|fft_r2c|log_softmax"
+UNBUILDABLE_OPTIMIZED_OPS_REGEX="_elu|gelu|fft|log_softmax"
 BUILDABLE_OPTIMIZED_OPS=$(buck2 query //kernels/optimized/cpu/... | grep -E -v $UNBUILDABLE_OPTIMIZED_OPS_REGEX)
 
 # TODO: build prim_ops_test_cpp again once supported_features works in
@@ -24,6 +24,8 @@ BUILDABLE_KERNELS_PRIM_OPS_TARGETS=$(buck2 query //kernels/prim_ops/... | grep -
 # TODO: expand the covered scope of Buck targets.
 # //runtime/kernel/... is failing because //third-party:torchgen_files's shell script can't find python on PATH.
 # //runtime/test/... requires Python torch, which we don't have in our OSS buck setup.
-buck2 test $BUILDABLE_OPTIMIZED_OPS //kernels/portable/... \
-      $BUILDABLE_KERNELS_PRIM_OPS_TARGETS //runtime/backend/... //runtime/core/... \
-      //runtime/executor: //runtime/kernel/... //runtime/platform/...
+for op in "build" "test"; do
+    buck2 $op $BUILDABLE_OPTIMIZED_OPS //kernels/portable/... \
+          $BUILDABLE_KERNELS_PRIM_OPS_TARGETS //runtime/backend/... //runtime/core/... \
+          //runtime/executor: //runtime/kernel/... //runtime/platform/...
+done
@@ -552,6 +552,7 @@ jobs:
           -DEXECUTORCH_ENABLE_LOGGING=1 \
           -DCMAKE_BUILD_TYPE=Release \
           -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+          -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
           -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
           -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
           -DEXECUTORCH_BUILD_XNNPACK=ON \
@@ -686,3 +687,32 @@ jobs:
       build-mode: Release
       build-tool: cmake
       docker-image: executorch-ubuntu-22.04-clang12
+
+  unittest-nxp-neutron:
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-clang12
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        set -eux
+        
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+        
+        # Build and install Executorch
+        PYTHON_EXECUTABLE=python \
+        CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \
+        .ci/scripts/setup-linux.sh --build-tool "cmake"
+        
+        # Install test requirements
+        pip install -r backends/nxp/requirements-tests.txt
+        
+        # Run pytest
+        PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh
@@ -271,6 +271,10 @@ exclude_patterns = [
     'examples/**',
     'exir/verification/bindings.cpp',
     'extension/**',
+    # Uses properly-gated (ET_USE_PYTORCH_HEADERS) ATen include.
+    'kernels/portable/cpu/util/elementwise_util.h',
+    'kernels/portable/cpu/util/math_util.h',
+    'kernels/portable/cpu/util/vectorized_math.h',
     'kernels/optimized/**',
     'runtime/core/exec_aten/**',
     # Want to be able to keep c10 in sync with PyTorch core.
@@ -386,15 +390,9 @@ exclude_patterns = [
     "third-party/**",
     # TODO: remove exceptions as we migrate
     # backends
-    "backends/vulkan/quantizer/**",
-    "backends/vulkan/test/**",
-    "backends/xnnpack/quantizer/**",
-    "backends/xnnpack/test/**",
-    "exir/tests/test_passes.py",
-    "extension/llm/export/builder.py",
-    "extension/llm/export/quantizer_lib.py",
     "exir/tests/test_memory_planning.py",
     "exir/backend/test/demos/test_xnnpack_qnnpack.py",
+    "backends/xnnpack/test/test_xnnpack_utils.py",
 ]
 
 command = [
 
@@ -152,37 +152,11 @@ else()
 endif()
 
 if(EXECUTORCH_BUILD_TESTS)
-  set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
   include(CTest)
 endif()
 
 add_subdirectory(third-party)
 
-if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
-  set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
-  set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
-  set(EXECUTORCH_BUILD_EXTENSION_MODULE ON)
-  set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
-endif()
-
-if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
-  set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
-endif()
-
-if(EXECUTORCH_BUILD_EXTENSION_MODULE)
-  set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
-  set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
-endif()
-
-if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT)
-  set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
-  set(EXECUTORCH_BUILD_KERNELS_CUSTOM ON)
-endif()
-
-if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
-  set(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON)
-endif()
-
 if(NOT DEFINED FXDIV_SOURCE_DIR)
   set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
       ${CMAKE_POSITION_INDEPENDENT_CODE}
@@ -336,7 +310,7 @@ if(EXECUTORCH_USE_CPP_CODE_COVERAGE)
            " -fprofile-instr-generate -fcoverage-mapping"
     )
   else()
-    message(ERROR
+    message(FATAL_ERROR
             "Code coverage for compiler ${CMAKE_CXX_COMPILER_ID} is unsupported"
     )
   endif()
@@ -456,14 +430,6 @@ endif()
 
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/configurations)
 
-#
-# gflags: Commandline flag host library.
-#
-
-if(EXECUTORCH_BUILD_GFLAGS)
-  add_subdirectory(third-party/gflags)
-endif()
-
 # Install `executorch` library as well as `executorch-config.cmake` under
 # ${CMAKE_INSTALL_PREFIX}/
 install(
 
@@ -33,8 +33,7 @@ cmake "$EXECUTORCH_ROOT_PATH" -B"$CMAKE_EXECUTORCH_BUILD_DIR_PATH" \
 -DPLATFORM=MAC_UNIVERSAL \
 -DDEPLOYMENT_TARGET=13.0 \
 -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=OFF \
--DEXECUTORCH_BUILD_XNNPACK=OFF \
--DEXECUTORCH_BUILD_GFLAGS=OFF
+-DEXECUTORCH_BUILD_XNNPACK=OFF
 
 cmake --build "$CMAKE_EXECUTORCH_BUILD_DIR_PATH"  -j9 -t executorch
 
 
@@ -62,7 +62,10 @@
     UnsqueezeScalarPlaceholdersPass,
 )
 
-from executorch.backends.arm.tosa_specification import TosaSpecification
+from executorch.backends.arm.tosa_specification import (
+    TosaLoweringContext,
+    TosaSpecification,
+)
 from executorch.backends.transforms.decompose_sdpa import (
     DecomposeScaledDotProductAttention,
 )
@@ -80,7 +83,8 @@ def __init__(self, tosa_spec: TosaSpecification) -> None:
         super().__init__()
 
     def _transform(self, graph_module: GraphModule):
-        return self(graph_module).graph_module
+        with TosaLoweringContext(self.tosa_spec):
+            return self(graph_module).graph_module
 
     def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
         self.add_pass(FuseQuantizedActivationPass())
 
@@ -12,8 +12,8 @@
 from executorch.backends.arm._passes.arm_pass_utils import get_first_fake_tensor
 
 from executorch.exir.pass_base import ExportPass, PassResult
-from torch.ao.quantization.fx.utils import get_new_attr_name_with_prefix
 from torch.fx import GraphModule, Node
+from torchao.quantization.pt2e.utils import get_new_attr_name_with_prefix
 
 
 class ScalarsToAttributePass(ExportPass):
 
@@ -44,7 +44,7 @@ def define_node(
         import tosa_tools.v0_80.serializer.tosa_serializer as ts  # type: ignore
 
         validate_num_inputs(self.target, inputs, 1)
-        validate_same_dtype(self.target, [*inputs, output])
+        validate_same_dtype(self.target, [*inputs, output], ts)
 
         # Handle int8 (quantized) and int32
         if not (inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]):
@@ -106,7 +106,7 @@ def define_node(
         import tosa_tools.v0_80.serializer.tosa_serializer as ts  # type: ignore
 
         validate_num_inputs(self.target, inputs, 1)
-        validate_same_dtype(self.target, [*inputs, output])
+        validate_same_dtype(self.target, [*inputs, output], ts)
 
         if inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]:
             # Call the inherited define_node for handling integers
@@ -153,7 +153,7 @@ def define_node(
         import serializer.tosa_serializer as ts  # type: ignore
 
         validate_num_inputs(self.target, inputs, 1)
-        validate_same_dtype(self.target, [*inputs, output])
+        validate_same_dtype(self.target, [*inputs, output], ts)
 
         # Handle int8 (quantized) and int32
         if not (inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]):
@@ -216,7 +216,7 @@ def define_node(
         import serializer.tosa_serializer as ts  # type: ignore
 
         validate_num_inputs(self.target, inputs, 1)
-        validate_same_dtype(self.target, [*inputs, output])
+        validate_same_dtype(self.target, [*inputs, output], ts)
 
         if inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]:
             # Call the inherited define_node for handling integers
 
@@ -45,7 +45,7 @@ def define_node(
         import tosa_tools.v0_80.serializer.tosa_serializer as ts  # type: ignore
 
         validate_num_inputs(self.target, inputs, 2)
-        validate_same_dtype(self.target, [*inputs, output])
+        validate_same_dtype(self.target, [*inputs, output], ts)
 
         # Handle int8 (quantized) and int32
         supported_dtypes = [ts.DType.INT8, ts.DType.INT32]
@@ -118,7 +118,7 @@ def define_node(
         import tosa_tools.v0_80.serializer.tosa_serializer as ts  # type: ignore
 
         validate_num_inputs(self.target, inputs, 2)
-        validate_same_dtype(self.target, [*inputs, output])
+        validate_same_dtype(self.target, [*inputs, output], ts)
 
         if inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]:
             # Call the inherited define_node for handling integers
@@ -163,7 +163,7 @@ def define_node(
         import serializer.tosa_serializer as ts  # type: ignore
 
         validate_num_inputs(self.target, inputs, 2)
-        validate_same_dtype(self.target, [*inputs, output])
+        validate_same_dtype(self.target, [*inputs, output], ts)
 
         # Handle int8 (quantized) and int32
         supported_dtypes = [ts.DType.INT8, ts.DType.INT32]
@@ -226,7 +226,7 @@ def define_node(
         import serializer.tosa_serializer as ts  # type: ignore
 
         validate_num_inputs(self.target, inputs, 2)
-        validate_same_dtype(self.target, [*inputs, output])
+        validate_same_dtype(self.target, [*inputs, output], ts)
 
         if inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]:
             # Call the inherited define_node for handling integers
 
@@ -36,7 +36,7 @@ def define_node(
         import tosa_tools.v0_80.serializer.tosa_serializer as ts
 
         validate_num_inputs(self.target, inputs, 3)
-        validate_same_dtype(self.target, [inputs[0], output])
+        validate_same_dtype(self.target, [inputs[0], output], ts)
 
         input = inputs[0]
         dim = inputs[1].number
@@ -79,7 +79,7 @@ def define_node(
         import serializer.tosa_serializer as ts
 
         validate_num_inputs(self.target, inputs, 3)
-        validate_same_dtype(self.target, [inputs[0], output])
+        validate_same_dtype(self.target, [inputs[0], output], ts)
 
         input = inputs[0]
         dim = inputs[1].number
 
@@ -36,7 +36,7 @@ def define_node(
         import tosa_tools.v0_80.serializer.tosa_serializer as ts
 
         validate_num_inputs(self.target, inputs, 3)
-        validate_same_dtype(self.target, [inputs[0], output])
+        validate_same_dtype(self.target, [inputs[0], output], ts)
 
         input = inputs[0]
         dim = inputs[1].number
@@ -79,7 +79,7 @@ def define_node(
         import serializer.tosa_serializer as ts
 
         validate_num_inputs(self.target, inputs, 3)
-        validate_same_dtype(self.target, [inputs[0], output])
+        validate_same_dtype(self.target, [inputs[0], output], ts)
 
         input = inputs[0]
         dim = inputs[1].number
 
@@ -35,7 +35,7 @@ def define_node(
         import tosa_tools.v0_80.serializer.tosa_serializer as ts  # type: ignore
 
         validate_num_inputs(self.target, inputs, 3)
-        validate_same_dtype(self.target, [inputs[0], output])
+        validate_same_dtype(self.target, [inputs[0], output], ts)
 
         if not (inputs[0].dtype == ts.DType.BOOL):
             raise ValueError("All inputs need to be BOOL." f"Got {inputs[0].dtype=}")
@@ -72,7 +72,7 @@ def define_node(
         import serializer.tosa_serializer as ts
 
         validate_num_inputs(self.target, inputs, 3)
-        validate_same_dtype(self.target, [inputs[0], output])
+        validate_same_dtype(self.target, [inputs[0], output], ts)
 
         if not (inputs[0].dtype == ts.DType.BOOL):
             raise ValueError("All inputs need to be BOOL." f"Got {inputs[0].dtype=}")
 
@@ -105,7 +105,7 @@ def define_node(
         import tosa_tools.v0_80.serializer.tosa_serializer as ts  # type: ignore
 
         validate_num_inputs(self.target, inputs, [3, 4, 6])
-        validate_same_dtype(self.target, [inputs[0], output])
+        validate_same_dtype(self.target, [inputs[0], output], ts)
 
         supported_dtypes = [ts.DType.INT8]
         if inputs[0].dtype not in supported_dtypes:
@@ -145,7 +145,7 @@ def define_node(
         import tosa_tools.v0_80.serializer.tosa_serializer as ts  # type: ignore
 
         validate_num_inputs(self.target, inputs, [3, 4, 6])
-        validate_same_dtype(self.target, [inputs[0], output])
+        validate_same_dtype(self.target, [inputs[0], output], ts)
 
         supported_dtypes = [ts.DType.INT8, ts.DType.FP32]
         if inputs[0].dtype not in supported_dtypes:
@@ -252,7 +252,7 @@ def define_node(
         import serializer.tosa_serializer as ts  # type: ignore
 
         validate_num_inputs(self.target, inputs, [3, 4, 6])
-        validate_same_dtype(self.target, [inputs[0], output])
+        validate_same_dtype(self.target, [inputs[0], output], ts)
 
         supported_dtypes = [ts.DType.INT8]
         if inputs[0].dtype not in supported_dtypes:
@@ -295,7 +295,7 @@ def define_node(
         import serializer.tosa_serializer as ts  # type: ignore
 
         validate_num_inputs(self.target, inputs, [3, 4, 6])
-        validate_same_dtype(self.target, [inputs[0], output])
+        validate_same_dtype(self.target, [inputs[0], output], ts)
 
         supported_dtypes = [ts.DType.INT8, ts.DType.FP32]
         if inputs[0].dtype not in supported_dtypes: