pytorch
diff --git a/‎.ci/scripts/unittest-linux.sh
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/unittest-linux.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/unittest-macos.sh
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/unittest-macos.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/arm_vela.py
Lines changed: 8 additions & 1 deletion b/‎backends/arm/arm_vela.py
Lines changed: 8 additions & 1 deletion
diff --git a/‎backends/arm/ethosu_backend.py
Lines changed: 6 additions & 1 deletion b/‎backends/arm/ethosu_backend.py
Lines changed: 6 additions & 1 deletion
diff --git a/‎backends/arm/operators/TARGETS
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operators/TARGETS
Lines changed: 1 addition & 0 deletions
diff --git a/‎extension/llm/custom_ops/op_sdpa.cpp
Lines changed: 20 additions & 22 deletions b/‎extension/llm/custom_ops/op_sdpa.cpp
Lines changed: 20 additions & 22 deletions
diff --git a/‎extension/llm/custom_ops/op_update_cache.cpp
Lines changed: 6 additions & 6 deletions b/‎extension/llm/custom_ops/op_update_cache.cpp
Lines changed: 6 additions & 6 deletions
diff --git a/‎extension/llm/tokenizer/targets.bzl
Lines changed: 1 addition & 0 deletions b/‎extension/llm/tokenizer/targets.bzl
Lines changed: 1 addition & 0 deletions
@@ -15,6 +15,7 @@ else
 fi
 
 # The generic Linux job chooses to use base env, not the one setup by the image
+eval "$(conda shell.bash hook)"
 CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
 conda activate "${CONDA_ENV}"
 
 
@@ -15,6 +15,7 @@ else
 fi
 
 bash .ci/scripts/setup-conda.sh
+eval "$(conda shell.bash hook)"
 
 # Create temp directory for sccache shims
 export TMP_DIR=$(mktemp -d)
 
@@ -39,7 +39,12 @@ def vela_bin_pack_io(prefix, data, shape_order=None):
 # Output via Vela to binary stream for ArmBackendEthosU
 # WARNING: Do not change this without changing VelaBinStream.cpp as that
 #          function consumes this format and the two need to align.
-def vela_compile(tosa_flatbuffer: bytes, args: List[str], shape_order=None):
+def vela_compile(
+    tosa_flatbuffer: bytes, args: List[str], shape_order=None, verbose: bool = False
+):
+    """
+    Compile a TOSA graph to a binary stream for ArmBackendEthosU using Vela.
+    """
     with tempfile.TemporaryDirectory() as tmpdir:
         tosaname = "out.tosa"
         tosa_path = os.path.join(tmpdir, tosaname)
@@ -50,6 +55,8 @@ def vela_compile(tosa_flatbuffer: bytes, args: List[str], shape_order=None):
         output_dir = os.path.join(tmpdir, "output")
         args.append(f"--output-dir={output_dir}")
         args.append(tosa_path)
+        if verbose:
+            args.append("--verbose-all")
         vela.main(" ".join(args).split(" "))
 
         if any("ethos-u85" in arg for arg in args) or any(
 
@@ -58,7 +58,12 @@ def _compile_tosa_flatbuffer(
             )
 
         # Pass on the TOSA flatbuffer to the vela compiler.
-        binary = vela_compile(tosa_flatbuffer, compile_flags, input_order)
+        binary = vela_compile(
+            tosa_flatbuffer,
+            compile_flags,
+            input_order,
+            verbose=logger.getEffectiveLevel() == logging.INFO,
+        )
         return binary
 
     @staticmethod
 
@@ -21,6 +21,7 @@ python_library(
         "//executorch/backends/arm:tosa_mapping",
         "//executorch/backends/arm:tosa_quant_utils",
         "//executorch/backends/arm:tosa_utils",
+        "//executorch/backends/arm/_passes:passes",
         "//executorch/exir:lib",
     ],
 )
 
@@ -594,46 +594,46 @@ bool validate_flash_attention_args(
     const Tensor& key,
     const Tensor& value,
     const optional<Tensor>& attn_mask) {
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(query.dim() == 4, "query must be a 4D tensor");
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(key.dim() == 4, "key must be a 4D tensor");
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(value.dim() == 4, "value must be a 4D tensor");
+  ET_CHECK_OR_RETURN_FALSE(query.dim() == 4, "query must be a 4D tensor");
+  ET_CHECK_OR_RETURN_FALSE(key.dim() == 4, "key must be a 4D tensor");
+  ET_CHECK_OR_RETURN_FALSE(value.dim() == 4, "value must be a 4D tensor");
 
   // Sizes
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       (query.size(3) == value.size(3)) && (key.size(3) == value.size(3)),
       "scaled_dot_product_attention_flash_attention: Q/K/V should have the same head size");
 
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       (query.scalar_type() == ScalarType::Float), "Query must be Float type");
 
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       (query.scalar_type() == key.scalar_type()) &&
           (query.scalar_type() == value.scalar_type()),
       "Key and Value must have the same data type as Query");
 
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       !attn_mask.has_value() || attn_mask.value().dim() == 2,
       "Attention mask must be a 2D tensor");
 
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       !attn_mask.has_value() ||
           attn_mask.value().scalar_type() == query.scalar_type(),
       "Attention mask must be a 2D tensor");
 
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       is_contiguous_dim_order(query.dim_order().data(), query.dim()),
       "key cache must be in contiguous dim order");
 
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       is_contiguous_dim_order(key.dim_order().data(), key.dim()),
       "value cache must be in contiguous dim order");
 
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       is_contiguous_dim_order(value.dim_order().data(), value.dim()),
       "value cache must be in contiguous dim order");
 
   if (attn_mask.has_value()) {
-    ET_LOG_MSG_AND_RETURN_IF_FALSE(
+    ET_CHECK_OR_RETURN_FALSE(
         is_contiguous_dim_order(
             attn_mask.value().dim_order().data(), attn_mask.value().dim()),
         "value cache must be in contiguous dim order");
@@ -647,21 +647,19 @@ bool validate_cache_params(
     const Tensor& v_cache,
     int64_t start_pos,
     int64_t seq_length) {
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
-      k_cache.dim() == 4, "kcache must be a 4D tensor");
+  ET_CHECK_OR_RETURN_FALSE(k_cache.dim() == 4, "kcache must be a 4D tensor");
 
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
-      v_cache.dim() == 4, "v_cache must be a 4D tensor");
+  ET_CHECK_OR_RETURN_FALSE(v_cache.dim() == 4, "v_cache must be a 4D tensor");
 
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       start_pos < k_cache.size(1),
       "start_pos must be less than key cache at dim 1");
 
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       start_pos < v_cache.size(1),
       "start_pos must be less than value cache at dim 1");
 
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       (start_pos + seq_length) <= k_cache.size(1),
       "start_post + seq_length must be less than max seq length supported by key cache."
       "start pos: %" PRId64 ", seq_length: %" PRId64
@@ -671,7 +669,7 @@ bool validate_cache_params(
       seq_length,
       k_cache.size(1));
 
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       (start_pos + seq_length) <= v_cache.size(1),
       "start_post + seq_length must be less than max seq length supported by key cache."
       "start pos: %" PRId64 ", seq_length: %" PRId64
@@ -682,11 +680,11 @@ bool validate_cache_params(
       v_cache.size(1));
 
   // Make sure they are in contiguous dim order
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       is_contiguous_dim_order(k_cache.dim_order().data(), k_cache.dim()),
       "key cache must be in contiguous dim order");
 
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       is_contiguous_dim_order(v_cache.dim_order().data(), v_cache.dim()),
       "value cache must be in contiguous dim order");
 
 
@@ -25,17 +25,17 @@ bool validate_cache_params(
     const Tensor& quantized_cache,
     int64_t start_pos,
     int64_t seq_length) {
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       quantized_cache.dim() == 4, "quantized cache must be a 4D tensor");
 
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       quantized_value.dim() == 4, "quantized_value must be a 4D tensor");
 
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       start_pos < quantized_cache.size(1),
       "start_pos must be less than cache size at dim 1");
 
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       (start_pos + seq_length) <= quantized_cache.size(1),
       "start_post + seq_length must be less than max seq length supported by cache."
       "start pos: %" PRId64 ", seq_length: %" PRId64
@@ -46,12 +46,12 @@ bool validate_cache_params(
       quantized_cache.size(1));
 
   // Make sure they are in contiguous dim order
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       is_contiguous_dim_order(
           quantized_cache.dim_order().data(), quantized_cache.dim()),
       "quantized cache must be in contiguous dim order");
 
-  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+  ET_CHECK_OR_RETURN_FALSE(
       is_contiguous_dim_order(
           quantized_value.dim_order().data(), quantized_value.dim()),
       "quantized value must be in contiguous dim order");
 
@@ -10,6 +10,7 @@ def define_common_targets():
         name = "tokenizer_py_lib",
         srcs = [
             "__init__.py",
+            "hf_tokenizer.py",
             "tokenizer.py",
             "utils.py",
         ],
Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,7 @@ python_library(`
`21`	`21`	`"//executorch/backends/arm:tosa_mapping",`
`22`	`22`	`"//executorch/backends/arm:tosa_quant_utils",`
`23`	`23`	`"//executorch/backends/arm:tosa_utils",`
	`24`	`+ "//executorch/backends/arm/_passes:passes",`
`24`	`25`	`"//executorch/exir:lib",`
`25`	`26`	`],`
`26`	`27`	`)`