pytorch
diff --git a/‎.github/workflows/android.yml
Lines changed: 2 additions & 13 deletions b/‎.github/workflows/android.yml
Lines changed: 2 additions & 13 deletions
diff --git a/‎backends/vulkan/runtime/api/Tensor.h
Lines changed: 2 additions & 2 deletions b/‎backends/vulkan/runtime/api/Tensor.h
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/repeat_channel.glsl
Lines changed: 1 addition & 4 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/repeat_channel.glsl
Lines changed: 1 addition & 4 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/repeat_channel.yaml
Lines changed: 0 additions & 1 deletion b/‎backends/vulkan/runtime/graph/ops/glsl/repeat_channel.yaml
Lines changed: 0 additions & 1 deletion
diff --git a/‎backends/vulkan/runtime/graph/ops/impl/Copy.h
Lines changed: 0 additions & 7 deletions b/‎backends/vulkan/runtime/graph/ops/impl/Copy.h
Lines changed: 0 additions & 7 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/impl/Repeat.cpp
Lines changed: 7 additions & 12 deletions b/‎backends/vulkan/runtime/graph/ops/impl/Repeat.cpp
Lines changed: 7 additions & 12 deletions
diff --git a/‎backends/vulkan/test/op_tests/utils/codegen.py
Lines changed: 1 addition & 5 deletions b/‎backends/vulkan/test/op_tests/utils/codegen.py
Lines changed: 1 addition & 5 deletions
diff --git a/‎build/test_android_ci.sh
Lines changed: 6 additions & 9 deletions b/‎build/test_android_ci.sh
Lines changed: 6 additions & 9 deletions
@@ -48,23 +48,12 @@ jobs:
         # Build Android demo app
         bash build/test_android_ci.sh
 
-        # Strip libraries for uploda
-        strip cmake-out-android-arm64-v8a/lib/*.a cmake-out-android-arm64-v8a/extension/android/*.so
-        strip cmake-out-android-x86_64/lib/*.a cmake-out-android-x86_64/extension/android/*.so
-
         mkdir -p artifacts-to-be-uploaded
-        mkdir -p artifacts-to-be-uploaded/arm64-v8a/
-        mkdir -p artifacts-to-be-uploaded/x86_64/
-        # Copy the jar to S3
-        cp extension/android/build/libs/executorch.jar artifacts-to-be-uploaded/
         # Copy the app and its test suite to S3
         cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/debug/*.apk artifacts-to-be-uploaded/
         cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/androidTest/debug/*.apk artifacts-to-be-uploaded/
-        # Also copy the libraries
-        cp cmake-out-android-arm64-v8a/lib/*.a artifacts-to-be-uploaded/arm64-v8a/
-        cp cmake-out-android-arm64-v8a/extension/android/*.so artifacts-to-be-uploaded/arm64-v8a/
-        cp cmake-out-android-x86_64/lib/*.a artifacts-to-be-uploaded/x86_64/
-        cp cmake-out-android-x86_64/extension/android/*.so artifacts-to-be-uploaded/x86_64/
+        # Also copy the share libraries
+        cp cmake-out-android/lib/*.a artifacts-to-be-uploaded/
 
   # Upload the app and its test suite to S3 so that they can be downloaded by the test job
   upload-artifacts:
 
@@ -220,8 +220,8 @@ class vTensor final {
    */
   const api::BufferBindInfo texture_limits_ubo();
 
-  inline const vTensor::TextureLimits texture_limits() const {
-    return texture_limits_;
+  inline const api::utils::ivec3 texture_limits() const {
+    return texture_limits_.limits;
   }
 
   inline size_t numel() const {
 
@@ -20,14 +20,12 @@ layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict
 layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
 
 layout(set = 0, binding = 2) uniform PRECISION restrict RepeatArgs {
-  // With input of size (n, c_i, h, w) and repeat r
+  // With input_size (n, c_i, h, w) and repeat r
   // out_size == (n, c_i * r, h, w)
   ivec4 out_sizes;
   ivec4 in_sizes;
 };
 
-
-
 layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 
 layout(constant_id = 3) const int packed_dim = C_DIM;
@@ -58,4 +56,3 @@ void main() {
 
   imageStore(image_out, out_pos, v);
 }
-
@@ -8,4 +8,3 @@ repeat_channel:
       - VALUE: float
   shader_variants:
     - NAME: repeat_channel
-
@@ -11,13 +11,6 @@
 #include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
 
 #include <executorch/backends/vulkan/runtime/api/api.h>
-#include <executorch/backends/vulkan/runtime/graph/Logging.h>
-
-#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h>
-#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
-#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
-
-#include <iostream>
 
 namespace vkcompute {
 
 
@@ -8,18 +8,13 @@
 
 #include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
 
-#include <executorch/backends/vulkan/runtime/api/api.h>
-#include <executorch/backends/vulkan/runtime/graph/Logging.h>
-
 #include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h>
 #include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h>
 #include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
 #include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
 
 #include <executorch/backends/vulkan/runtime/graph/ops/impl/Copy.h>
 
-#include <iostream>
-
 namespace vkcompute {
 
 namespace {
@@ -137,12 +132,12 @@ void add_repeat_node(
   // After expanding a dimension, we will update the "running_range" since we
   // will need to copy the "expanded" area.
 
-  api::utils::ivec3 running_range = t_in->texture_limits().limits;
+  api::utils::ivec3 running_range = t_in->texture_limits();
 
   const std::vector<int64_t>& in_sizes = t_in->sizes();
 
-  // We use channel packing, repeating the channel dimension is the most
-  // complicated and time-consuming, since we need to reason over misaligned
+  // Since we use channel packing, repeating the channel dimension is the most
+  // complicated and time-consuming, as we need to reason over misaligned
   // channels. Hence we expand it first to minimize cost. Also, in this first
   // dimension, we copy over the input texure to the output. In subsequent
   // dimensions, we read and write from the same tensor.
@@ -159,12 +154,12 @@ void add_repeat_node(
     add_repeat_channel_node(graph, in, channel_repeat, out, running_range);
   }
 
+  // TODO: refactor width, height, and batch into a common helper function.
   // Width
   if (int64_t width_repeat = dim_at<Dim4D::Width>(repeats); width_repeat > 1) {
     api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
-    // api::utils::ivec3 range = t_in->texture_limits().limits;
 
-    for (int i = 1; i < width_repeat; i++) {
+    for (int i = 1; i < width_repeat; ++i) {
       api::utils::ivec3 dst_offset = api::utils::make_ivec3(
           {i * dim_at<Dim4D::Width>(in_sizes), 0, 0}, false);
 
@@ -180,7 +175,7 @@ void add_repeat_node(
       height_repeat > 1) {
     api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
 
-    for (int i = 1; i < height_repeat; i++) {
+    for (int i = 1; i < height_repeat; ++i) {
       api::utils::ivec3 dst_offset = api::utils::make_ivec3(
           {0, i * dim_at<Dim4D::Height>(in_sizes), 0}, false);
 
@@ -195,7 +190,7 @@ void add_repeat_node(
   if (int64_t batch_repeat = dim_at<Dim4D::Batch>(repeats); batch_repeat > 1) {
     api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
 
-    for (int i = 1; i < batch_repeat; i++) {
+    for (int i = 1; i < batch_repeat; ++i) {
       api::utils::ivec3 dst_offset =
           api::utils::make_ivec3({0, 0, i * running_range.data[2]}, false);
 
 
@@ -33,11 +33,7 @@
 
 from torchgen.gen import generate_static_dispatch_backend_call, translate_args
 
-from torchgen.gen_aoti_c_shim import (
-    gen_aoti_c_shim,
-    gen_static_dispatch_backend_call_signature,
-    get_backend_index_for_aoti,
-)
+from torchgen.gen_aoti_c_shim import gen_static_dispatch_backend_call_signature
 from torchgen.model import NativeFunction, Variant
 
 ##################################
 
@@ -8,20 +8,18 @@
 set -ex
 
 # https://github.com/pytorch/executorch/tree/main/examples/demo-apps/android/ExecuTorchDemo
-export_model() {
+build_executorch() {
   MODEL_NAME=dl3
   # Delegating DeepLab v3 to XNNPACK backend
   python -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate
 
   ASSETS_DIR=examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/
   mkdir -p "${ASSETS_DIR}"
   cp "${MODEL_NAME}_xnnpack_fp32.pte" "${ASSETS_DIR}"
-}
 
-build_android_native_library() {
-  pushd examples/demo-apps/android/LlamaDemo
-  CMAKE_OUT="cmake-out-android-$1" ANDROID_NDK=/opt/ndk ANDROID_ABI="$1" ./gradlew setup
-  popd
+  rm -rf cmake-out && mkdir cmake-out
+  ANDROID_NDK=/opt/ndk BUCK2=$(which buck2) FLATC=$(which flatc) ANDROID_ABI=arm64-v8a \
+    bash examples/demo-apps/android/ExecuTorchDemo/setup.sh
 }
 
 build_android_demo_app() {
@@ -32,13 +30,12 @@ build_android_demo_app() {
 
 build_android_llama_demo_app() {
   pushd examples/demo-apps/android/LlamaDemo
+  ANDROID_NDK=/opt/ndk ANDROID_ABI=arm64-v8a ./gradlew setup
   ANDROID_HOME=/opt/android/sdk ./gradlew build
   ANDROID_HOME=/opt/android/sdk ./gradlew assembleAndroidTest
   popd
 }
 
-build_android_native_library arm64-v8a
-build_android_native_library x86_64
-export_model
+build_executorch
 build_android_demo_app
 build_android_llama_demo_app
Original file line number	Diff line number	Diff line change
`@@ -220,8 +220,8 @@ class vTensor final {`
`220`	`220`	`*/`
`221`	`221`	`const api::BufferBindInfo texture_limits_ubo();`
`222`	`222`
`223`		`- inline const vTensor::TextureLimits texture_limits() const {`
`224`		`- return texture_limits_;`
	`223`	`+ inline const api::utils::ivec3 texture_limits() const {`
	`224`	`+ return texture_limits_.limits;`
`225`	`225`	`}`
`226`	`226`
`227`	`227`	`inline size_t numel() const {`