pytorch
diff --git a/‎.ci/docker/requirements-ci.txt
Lines changed: 1 addition & 0 deletions b/‎.ci/docker/requirements-ci.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/_android.yml
Lines changed: 4 additions & 0 deletions b/‎.github/workflows/_android.yml
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/android-perf.yml
Lines changed: 5 additions & 1 deletion b/‎.github/workflows/android-perf.yml
Lines changed: 5 additions & 1 deletion
diff --git a/‎.github/workflows/android-release-artifacts.yml
Lines changed: 4 additions & 0 deletions b/‎.github/workflows/android-release-artifacts.yml
Lines changed: 4 additions & 0 deletions
diff --git a/‎backends/arm/test/ops/test_sigmoid.py
Lines changed: 19 additions & 5 deletions b/‎backends/arm/test/ops/test_sigmoid.py
Lines changed: 19 additions & 5 deletions
diff --git a/‎backends/arm/test/targets.bzl
Lines changed: 5 additions & 2 deletions b/‎backends/arm/test/targets.bzl
Lines changed: 5 additions & 2 deletions
diff --git a/‎backends/vulkan/runtime/api/containers/Tensor.cpp
Lines changed: 22 additions & 0 deletions b/‎backends/vulkan/runtime/api/containers/Tensor.cpp
Lines changed: 22 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/gen_vulkan_spv.py
Lines changed: 2 additions & 0 deletions b/‎backends/vulkan/runtime/gen_vulkan_spv.py
Lines changed: 2 additions & 0 deletions
@@ -17,6 +17,7 @@ parameterized==0.9.0
 
 # Doc build requirements, same as https://github.com/pytorch/pytorch/blob/main/.ci/docker/requirements-docs.txt
 sphinx==5.3.0
+sphinx-reredirects==0.1.4
 sphinx-gallery==0.14.0
 breathe==4.34.0
 exhale==0.2.3
 
@@ -22,6 +22,10 @@ jobs:
       script: |
         set -eux
 
+        # Use sccache for NDK compiler as well
+        export CMAKE_CXX_COMPILER_LAUNCHER=sccache
+        export CMAKE_C_COMPILER_LAUNCHER=sccache
+
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
 
@@ -353,6 +353,10 @@ jobs:
       script: |
         set -eux
 
+        # Use sccache for NDK compiler as well
+        export CMAKE_CXX_COMPILER_LAUNCHER=sccache
+        export CMAKE_C_COMPILER_LAUNCHER=sccache
+
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
@@ -392,7 +396,7 @@ jobs:
       fail-fast: false
     with:
       # Due to scheduling a job may be pushed beyond the default 60m threshold
-      timeout: 120
+      timeout: 240
       device-type: android
       runner: linux.2xlarge
       test-infra-ref: ''
 
@@ -60,6 +60,10 @@ jobs:
       script: |
         set -eux
 
+        # Use sccache for NDK compiler as well
+        export CMAKE_CXX_COMPILER_LAUNCHER=sccache
+        export CMAKE_C_COMPILER_LAUNCHER=sccache
+
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
 
@@ -9,8 +9,10 @@
 
 from typing import Tuple
 
+import pytest
+
 import torch
-from executorch.backends.arm.test import common
+from executorch.backends.arm.test import common, conftest
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
@@ -63,7 +65,7 @@ def forward(self, x, y):
     def _test_sigmoid_tosa_MI_pipeline(
         self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
     ):
-        (
+        tester = (
             ArmTester(
                 module,
                 example_inputs=test_data,
@@ -77,11 +79,13 @@ def _test_sigmoid_tosa_MI_pipeline(
             .check_not(["executorch_exir_dialects_edge__ops_aten_sigmoid_default"])
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
-            .run_method_and_compare_outputs(inputs=test_data)
         )
 
+        if conftest.is_option_enabled("tosa_ref_model"):
+            tester.run_method_and_compare_outputs(inputs=test_data)
+
     def _test_sigmoid_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tuple):
-        (
+        tester = (
             ArmTester(
                 module,
                 example_inputs=test_data,
@@ -96,9 +100,11 @@ def _test_sigmoid_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tup
             .check_not(["executorch_exir_dialects_edge__ops_aten_sigmoid_default"])
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
-            .run_method_and_compare_outputs(inputs=test_data)
         )
 
+        if conftest.is_option_enabled("tosa_ref_model"):
+            tester.run_method_and_compare_outputs(inputs=test_data)
+
     def _test_sigmoid_tosa_ethos_BI_pipeline(
         self,
         compile_spec: list[CompileSpec],
@@ -137,6 +143,7 @@ def _test_sigmoid_tosa_u85_BI_pipeline(
         )
 
     @parameterized.expand(test_data_suite)
+    @pytest.mark.tosa_ref_model
     def test_sigmoid_tosa_MI(
         self,
         test_name: str,
@@ -145,26 +152,33 @@ def test_sigmoid_tosa_MI(
         self._test_sigmoid_tosa_MI_pipeline(self.Sigmoid(), (test_data,))
 
     @parameterized.expand(test_data_suite)
+    @pytest.mark.tosa_ref_model
     def test_sigmoid_tosa_BI(self, test_name: str, test_data: torch.Tensor):
         self._test_sigmoid_tosa_BI_pipeline(self.Sigmoid(), (test_data,))
 
+    @pytest.mark.tosa_ref_model
     def test_add_sigmoid_tosa_MI(self):
         self._test_sigmoid_tosa_MI_pipeline(self.AddSigmoid(), (test_data_suite[0][1],))
 
+    @pytest.mark.tosa_ref_model
     def test_add_sigmoid_tosa_BI(self):
         self._test_sigmoid_tosa_BI_pipeline(self.AddSigmoid(), (test_data_suite[5][1],))
 
+    @pytest.mark.tosa_ref_model
     def test_sigmoid_add_tosa_MI(self):
         self._test_sigmoid_tosa_MI_pipeline(self.SigmoidAdd(), (test_data_suite[0][1],))
 
+    @pytest.mark.tosa_ref_model
     def test_sigmoid_add_tosa_BI(self):
         self._test_sigmoid_tosa_BI_pipeline(self.SigmoidAdd(), (test_data_suite[0][1],))
 
+    @pytest.mark.tosa_ref_model
     def test_sigmoid_add_sigmoid_tosa_MI(self):
         self._test_sigmoid_tosa_MI_pipeline(
             self.SigmoidAddSigmoid(), (test_data_suite[4][1], test_data_suite[3][1])
         )
 
+    @pytest.mark.tosa_ref_model
     def test_sigmoid_add_sigmoid_tosa_BI(self):
         self._test_sigmoid_tosa_BI_pipeline(
             self.SigmoidAddSigmoid(), (test_data_suite[4][1], test_data_suite[3][1])
 
@@ -12,8 +12,11 @@ def define_arm_tests():
     test_files.remove("passes/test_ioquantization_pass.py")
 
     # Operators
-    test_files += ["ops/test_linear.py"]
-    test_files += ["ops/test_slice.py"]
+    test_files += [
+        "ops/test_linear.py", 
+        "ops/test_slice.py",
+        "ops/test_sigmoid.py",
+    ]
 
     TESTS = {}
 
 
@@ -260,6 +260,26 @@ vkapi::VulkanImage allocate_image(
       return vkapi::VulkanImage();
   }
 
+    // TODO(ssjia): change to always check that the image extents do not exceed
+    // physical limits. Adding the check now based on `maxImageDimension3D` will
+    // cause some existing models to break. Anecdotally, on Adreno and
+    // SwiftShader devices, using 3D textures that exceed `maxImageDimension3D`
+    // appears to be ok. So we need to figure out if is it undefined behaviour
+    // or if there's a better way to figure out what the limit is. For now, only
+    // check during debug build so that we can detect when exceeding physical
+    // limits could be a potential cause for model outputs to be wrong. In the
+    // meantime, the threshold for using texture storage can be configured at
+    // export time.
+#ifdef VULKAN_DEBUG
+  uint32_t max_extent = storage_type == utils::kTexture3D
+      ? adapter_ptr->max_texture3d_dim()
+      : adapter_ptr->max_texture2d_dim();
+
+  VK_CHECK_COND(
+      image_extents[0] <= max_extent && image_extents[1] <= max_extent &&
+      image_extents[2] <= max_extent);
+#endif
+
   VkSampler sampler = adapter_ptr->sampler_cache().retrieve(sampler_props);
 
   return adapter_ptr->vma().create_image(
@@ -291,6 +311,8 @@ vkapi::VulkanBuffer allocate_buffer(
       return vkapi::VulkanBuffer();
   }
 
+  VK_CHECK_COND(numel <= context_ptr->adapter_ptr()->max_buffer_numel());
+
   return adapter_ptr->vma().create_storage_buffer(
       element_size(dtype) * numel, allocate_memory);
 }
 
@@ -125,6 +125,8 @@ def buffer_gvec_type(dtype: str, n: int) -> str:
 
     if dtype == "float":
         return f"vec{n}"
+    if dtype == "uint":
+        return f"uvec{n}"
     elif dtype == "half":
         return f"f16vec{n}"
     elif dtype == "int":