Update on "{executorch][llama] support mqa"

kimishpatel · kimishpatel · commit 26aced7565e1 · 2024-04-17T07:08:56.000-07:00
This diff adds support for multi query attention for sdpa with kv cache Differential Revision: [D56228316](https://our.internmc.facebook.com/intern/diff/D56228316/) [ghstack-poisoned]
diff --git a/examples/models/llama2/custom_ops/TARGETS b/examples/models/llama2/custom_ops/TARGETS
@@ -1,14 +1,14 @@
 # Any targets that should be shared between fbcode and xplat must be defined in
 # targets.bzl. This file can contain fbcode-only targets.
 
-load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest")
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
 load(":targets.bzl", "define_common_targets")
 
 oncall("executorch")
 
 define_common_targets()
 
-python_unittest(
+runtime.python_test(
     name = "test_sdpa_with_kv_cache",
     srcs = [
         "test_sdpa_with_kv_cache.py",