pytorch
diff --git a/‎.ci/docker/requirements-ci.txt
Lines changed: 1 addition & 0 deletions b/‎.ci/docker/requirements-ci.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/_android.yml
Lines changed: 4 additions & 0 deletions b/‎.github/workflows/_android.yml
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/android-perf.yml
Lines changed: 5 additions & 1 deletion b/‎.github/workflows/android-perf.yml
Lines changed: 5 additions & 1 deletion
diff --git a/‎.github/workflows/android-release-artifacts.yml
Lines changed: 4 additions & 0 deletions b/‎.github/workflows/android-release-artifacts.yml
Lines changed: 4 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/api/containers/Tensor.cpp
Lines changed: 22 additions & 0 deletions b/‎backends/vulkan/runtime/api/containers/Tensor.cpp
Lines changed: 22 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/gen_vulkan_spv.py
Lines changed: 2 additions & 0 deletions b/‎backends/vulkan/runtime/gen_vulkan_spv.py
Lines changed: 2 additions & 0 deletions
@@ -17,6 +17,7 @@ parameterized==0.9.0
 
 # Doc build requirements, same as https://github.com/pytorch/pytorch/blob/main/.ci/docker/requirements-docs.txt
 sphinx==5.3.0
+sphinx-reredirects==0.1.4
 sphinx-gallery==0.14.0
 breathe==4.34.0
 exhale==0.2.3
 
@@ -22,6 +22,10 @@ jobs:
       script: |
         set -eux
 
+        # Use sccache for NDK compiler as well
+        export CMAKE_CXX_COMPILER_LAUNCHER=sccache
+        export CMAKE_C_COMPILER_LAUNCHER=sccache
+
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
 
@@ -353,6 +353,10 @@ jobs:
       script: |
         set -eux
 
+        # Use sccache for NDK compiler as well
+        export CMAKE_CXX_COMPILER_LAUNCHER=sccache
+        export CMAKE_C_COMPILER_LAUNCHER=sccache
+
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
@@ -392,7 +396,7 @@ jobs:
       fail-fast: false
     with:
       # Due to scheduling a job may be pushed beyond the default 60m threshold
-      timeout: 120
+      timeout: 240
       device-type: android
       runner: linux.2xlarge
       test-infra-ref: ''
 
@@ -60,6 +60,10 @@ jobs:
       script: |
         set -eux
 
+        # Use sccache for NDK compiler as well
+        export CMAKE_CXX_COMPILER_LAUNCHER=sccache
+        export CMAKE_C_COMPILER_LAUNCHER=sccache
+
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
 
@@ -260,6 +260,26 @@ vkapi::VulkanImage allocate_image(
       return vkapi::VulkanImage();
   }
 
+    // TODO(ssjia): change to always check that the image extents do not exceed
+    // physical limits. Adding the check now based on `maxImageDimension3D` will
+    // cause some existing models to break. Anecdotally, on Adreno and
+    // SwiftShader devices, using 3D textures that exceed `maxImageDimension3D`
+    // appears to be ok. So we need to figure out if is it undefined behaviour
+    // or if there's a better way to figure out what the limit is. For now, only
+    // check during debug build so that we can detect when exceeding physical
+    // limits could be a potential cause for model outputs to be wrong. In the
+    // meantime, the threshold for using texture storage can be configured at
+    // export time.
+#ifdef VULKAN_DEBUG
+  uint32_t max_extent = storage_type == utils::kTexture3D
+      ? adapter_ptr->max_texture3d_dim()
+      : adapter_ptr->max_texture2d_dim();
+
+  VK_CHECK_COND(
+      image_extents[0] <= max_extent && image_extents[1] <= max_extent &&
+      image_extents[2] <= max_extent);
+#endif
+
   VkSampler sampler = adapter_ptr->sampler_cache().retrieve(sampler_props);
 
   return adapter_ptr->vma().create_image(
@@ -291,6 +311,8 @@ vkapi::VulkanBuffer allocate_buffer(
       return vkapi::VulkanBuffer();
   }
 
+  VK_CHECK_COND(numel <= context_ptr->adapter_ptr()->max_buffer_numel());
+
   return adapter_ptr->vma().create_storage_buffer(
       element_size(dtype) * numel, allocate_memory);
 }
 
@@ -125,6 +125,8 @@ def buffer_gvec_type(dtype: str, n: int) -> str:
 
     if dtype == "float":
         return f"vec{n}"
+    if dtype == "uint":
+        return f"uvec{n}"
     elif dtype == "half":
         return f"f16vec{n}"
     elif dtype == "int":