Add index check for embedding kernel (#11375)

larryliu0820 · facebook-github-bot · commit 8dda88e9550b · 2025-06-04T16:09:28.000-07:00
Summary: index should always be smaller than weight.size(0). Adding this check in `op_embedding`. This is to avoid wild-addr-read error: ``` AddressSanitizer:DEADLYSIGNAL ================================================================= ==3544359==ERROR: AddressSanitizer: SEGV on unknown address 0x7fce2364bc00 (pc 0x000002d225a0 bp 0x7ffffc792a40 sp 0x7ffffc792990 T0) ==3544359==The signal is caused by a READ memory access. SCARINESS: 20 (wild-addr-read) #0 0x2d225a0 in void torch::executor::native::(anonymous namespace)::embedding_byte_per_channel<signed char, c10::Half, float>(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor&) xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:175 #1 0x2d22367 in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::ScalarType>, executorch::runtime::etensor::Tensor&)::$_0::operator()() const::'lambda0'()::operator()() const::'lambda'()::operator()() const::'lambda0'()::operator()() const::'lambda'()::operator()() const::'lambda'()::operator()() const xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 #2 0x2d2223d in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::ScalarType>, executorch::runtime::etensor::Tensor&)::$_0::operator()() const::'lambda0'()::operator()() const::'lambda'()::operator()() const::'lambda0'()::operator()() const::'lambda'()::operator()() const xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 #3 0x2d21d37 in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::ScalarType>, executorch::runtime::etensor::Tensor&)::$_0::operator()() const::'lambda0'()::operator()() const::'lambda'()::operator()() const::'lambda0'()::operator()() const xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 #4 0x2d21bca in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::ScalarType>, executorch::runtime::etensor::Tensor&)::$_0::operator()() const::'lambda0'()::operator()() const::'lambda'()::operator()() const xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 #5 0x2d20f8f in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::ScalarType>, executorch::runtime::etensor::Tensor&)::$_0::operator()() const::'lambda0'()::operator()() const xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 #6 0x2d20e13 in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::ScalarType>, executorch::runtime::etensor::Tensor&)::$_0::operator()() const xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 #7 0x2d20d06 in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::ScalarType>, executorch::runtime::etensor::Tensor&) xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 #8 0x2d226b7 in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::KernelRuntimeContext&, executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::ScalarType>, executorch::runtime::etensor::Tensor&) xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:329 #9 0x2d09bef in torch::executor::function::(anonymous namespace)::$_7::operator()(executorch::runtime::KernelRuntimeContext&, executorch::runtime::EValue**) const buck-out/v2/gen/fbsource/ff19a7e6cb17a7b1/xplat/executorch/kernels/quantized/__generated_lib_combined__/out/RegisterCodegenUnboxedKernelsEverything.cpp:322 #10 0x2d09a70 in torch::executor::function::(anonymous namespace)::$_7::__invoke(executorch::runtime::KernelRuntimeContext&, executorch::runtime::EValue**) buck-out/v2/gen/fbsource/ff19a7e6cb17a7b1/xplat/executorch/kernels/quantized/__generated_lib_combined__/out/RegisterCodegenUnboxedKernelsEverything.cpp:297 #11 0x27d769b in executorch::runtime::Method::execute_instruction() xplat/executorch/runtime/executor/method.cpp:1306 #12 0x27d8c55 in executorch::runtime::Method::execute() xplat/executorch/runtime/executor/method.cpp:1550 #13 0x27b1e25 in executorch::extension::Module::execute(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&, std::vector<executorch::runtime::EValue, std::allocator<executorch::runtime::EValue>> const&) xplat/executorch/extension/module/module.cpp:261 #14 0x27afe43 in executorch::extension::Module::forward(std::vector<executorch::runtime::EValue, std::allocator<executorch::runtime::EValue>> const&) xplat/executorch/extension/module/module.h:340 #15 0x27e0519 in executorch::extension::llm::LlmBackboneRunner::run(std::shared_ptr<executorch::runtime::etensor::Tensor> const&, std::shared_ptr<executorch::runtime::etensor::Tensor> const&, std::shared_ptr<executorch::runtime::etensor::Tensor> const&) xplat/executorch/examples/models/fb/llama4/runner/llm_backbone_runner.cpp:58 #16 0x27a35c9 in executorch::extension::llm::Llama4Runner::prefill_tokens(std::shared_ptr<executorch::runtime::etensor::Tensor> const&, std::shared_ptr<executorch::runtime::etensor::Tensor> const&, std::shared_ptr<executorch::runtime::etensor::Tensor> const&) xplat/executorch/examples/models/fb/llama4/runner/llama4_runner.cpp:133 #17 0x885774 in main (/data/users/larryliu/fbsource/buck-out/v2/gen/fbsource/ff19a7e6cb17a7b1/xplat/cria/benchmark/llama4/__generation_main__/generation_main+0x885774) #18 0x7fce2122c656 in __libc_start_call_main /home/engshare/third-party2/glibc/2.34/src/glibc-2.34/csu/../sysdeps/nptl/libc_start_call_main.h:58:16 #19 0x7fce2122c717 in __libc_start_main@GLIBC_2.2.5 /home/engshare/third-party2/glibc/2.34/src/glibc-2.34/csu/../csu/libc-start.c:409:3 #20 0x884c20 in _start /home/engshare/third-party2/glibc/2.34/src/glibc-2.34/csu/../sysdeps/x86_64/start.S:116 AddressSanitizer can not provide additional info. AddressSanitizer: SEGV xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:175 in void torch::executor::native::(anonymous namespace)::embedding_byte_per_channel<signed char, c10::Half, float>(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional<executorch::runtime::etensor::Tensor> const&, executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor&) ==3544359==ABORTING ``` Test Plan: Imported from GitHub, without a `Test Plan:` line. Rollback Plan: Reviewed By: Gasoonjia Differential Revision: D75982682 Pulled By: larryliu0820
diff --git a/kernels/quantized/cpu/op_embedding.cpp b/kernels/quantized/cpu/op_embedding.cpp
@@ -153,6 +153,22 @@ void embedding_byte_per_channel(
 
   for (int i = 0; i < indices.numel(); i++) {
     int64_t index = indices_ptr[i];
+
+    // Check if index is out of bounds for both weight and weight_scales
+    ET_CHECK_MSG(
+        index >= 0 && index < weight.size(0),
+        "Index out of bounds for weight: index %" PRId64
+        " must be in range [0, %zd)",
+        index,
+        weight.size(0));
+
+    ET_CHECK_MSG(
+        index >= 0 && index < weight_scales.size(0),
+        "Index out of bounds for weight_scales: index %" PRId64
+        " must be in range [0, %zd)",
+        index,
+        weight_scales.size(0));
+
     // If using groupwise embedding
     int32_t qparams_index = index * num_groups_per_channel;
     CTYPE_PARAMS zp = 0.0;
diff --git a/kernels/quantized/test/op_embedding_test.cpp b/kernels/quantized/test/op_embedding_test.cpp
@@ -373,3 +373,38 @@ TEST(OpQuantizedEmbeddingTest, TestGroupWiseQuantizedEmbeddingDeath5) {
           out),
       "");
 }
+
+TEST(OpQuantizedEmbeddingTest, TestOutOfBoundsIndex) {
+  et_pal_init();
+  TensorFactory<ScalarType::Float> tf;
+  TensorFactory<ScalarType::Long> tf_l;
+
+  int64_t quant_min = 0;
+  int64_t quant_max = 255;
+
+  // Create a weight tensor with 3 rows
+  TensorFactory<ScalarType::Byte> tfo;
+  Tensor qweight =
+      tfo.make({3, 4}, {8, 10, 12, 14, 10, 12, 12, 14, 8, 9, 10, 12});
+
+  // Create weight_scales with the same number of rows
+  Tensor weight_scales = tf.make({3, 1}, {0.5, 1.0, 1.5});
+  Tensor weight_zero_points = tf.make({3, 1}, {1, 5, 7});
+
+  // Create indices with an out-of-bounds index (3, which is >= weight.size(0))
+  Tensor indices = tf_l.make({2}, {1, 3});
+
+  Tensor out = tf.zeros({2, 4});
+
+  // Expect death when accessing an out-of-bounds index
+  ET_EXPECT_DEATH(
+      quantized_embedding_byte_out(
+          qweight,
+          weight_scales,
+          weight_zero_points,
+          quant_min,
+          quant_max,
+          indices,
+          out),
+      "Index out of bounds for weight");
+}