Skip to content

Commit fbd1a7f

Browse files
authored
Merge branch 'main' into export-D69871232
2 parents 06bdbc0 + 3e188fe commit fbd1a7f

File tree

27 files changed

+330
-69
lines changed

27 files changed

+330
-69
lines changed

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,11 @@ if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
258258
set(EXECUTORCH_BUILD_EXTENSION_MODULE ON)
259259
endif()
260260

261+
if(EXECUTORCH_BUILD_EXTENSION_MODULE)
262+
set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
263+
set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
264+
endif()
265+
261266
if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT)
262267
set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
263268
set(EXECUTORCH_BUILD_KERNELS_CUSTOM ON)

CODEOWNERS

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# IMPORTANT:
2+
# This file is ONLY used to subscribe for notifications for PRs
3+
# related to a specific file path. Approvals from people in this
4+
# file are not required for merges.
5+
6+
/backends/apple @shoumikhin @cccclai
7+
/backends/apple/mps @cccclai @DenisVieriu97
8+
/backends/arm @digantdesai
9+
/backends/cadence @tarun292
10+
/backends/example @iseeyuan @JacobSzwejbka @larryliu0820
11+
/backends/mediatek @cccclai @neuropilot-captain
12+
/backends/qualcomm @cccclai @chunit-quic @haowhsu-quic @shewu-quic @winskuo-quic
13+
/backends/test @cccclai
14+
/backends/transforms @kimishpatel
15+
/backends/vulkan @SS-JIA
16+
/backends/xnnpack @digantdesai @mcr229
17+
18+
/build @GregoryComer @dbort @kirklandsign
19+
20+
/codegen @larryliu0820 @lucylq
21+
22+
/devtools @tarun292 @Gasoonjia
23+
24+
/docs @mergennachin
25+
26+
/examples/apple @shoumikhin
27+
/examples/apple/coreml @cccclai @metascroy @cymbalrush @YifanShenSZ
28+
/examples/arm @digantdesai
29+
/examples/cadence @tarun292
30+
/examples/demo-apps @shoumikhin @kirklandsign
31+
/examples/devtools @tarun292
32+
/examples/llm_manual @larryliu0820
33+
/examples/llm_pte_finetuning @JacobSzwejbka
34+
/examples/mediatek @cccclai
35+
/examples/models @lucylq
36+
/examples/portable @larryliu0820 @manuelcandales
37+
/examples/qualcomm @cccclai
38+
/examples/selective_build @lucylq @larryliu0820 @JacobSzwejbka
39+
/examples/xnnpack @digantdesai @mcr229
40+
41+
/exir/backend @cccclai @kimishpatel @JacobSzwejbka @tarun292
42+
/exir @JacobSzwejbka @tarun292 @larryliu0820
43+
44+
45+
/extension/android @kirklandsign
46+
/extension/android_test @kirklandsign
47+
/extension/apple @shoumikhin
48+
/extension/aten_util @JacobSzwejbka
49+
/extension/benchmark @tarun292
50+
/extension/data_loader @JacobSzwejbka @lucylq @dbort
51+
/extension/evalue_util @GregoryComer @dbort
52+
/extension/export_util @kimishpatel
53+
/extension/flat_tensor @lucylq
54+
/extension/gguf_util @larryliu0820
55+
/extension/kernel_util @kimishpatel @manuelcandales
56+
/extension/llm @jackzhxng @iseeyuan @larryliu0820
57+
/extension/memory_allocator @JacobSzwejbka @dbort
58+
/extension/module @shoumikhin
59+
/extension/parallel @kimishpatel
60+
/extension/pybindings @JacobSzwejbka @larryliu0820
61+
/extension/pytree @JacobSzwejbka
62+
/extension/runner_util @dbort
63+
/extension/tensor @shoumikhin
64+
/extension/testing_util @dbort
65+
/extension/threadpool @kimishpatel
66+
/extension/training @JacobSzwejbka
67+
68+
/kernels @manuelcandales
69+
70+
/profiler @tarun292 @Gasoonjia
71+
72+
/runtime @dbort @JacobSzwejbka @lucylq
73+
/runtime/backend @cccclai
74+
75+
/schema @dbort @JacobSzwejbka @lucylq
76+
77+
/scripts @GregoryComer
78+
79+
/shim @larryliu0820 @GregoryComer
80+
81+
/third-party @GregoryComer
82+
83+
/test @larryliu0820 @kirklandsign
84+
85+
/util @tarun292

backends/arm/operators/TARGETS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ python_library(
1313

1414
python_library(
1515
name = "ops",
16-
srcs = glob(["op_*.py"]),
16+
srcs = glob(["op_*.py", "ops_*.py"]),
1717
typing = True,
1818
deps = [
1919
"fbsource//third-party/serialization_lib/python/tosa:tosa",

backends/cadence/CMakeLists.txt

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@ endif()
2222
include(${EXECUTORCH_ROOT}/build/Utils.cmake)
2323

2424
# Let files say "include <executorch/path/to/header.h>".
25-
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
25+
set(_common_include_directories ${EXECUTORCH_ROOT}/..
26+
${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
27+
28+
add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS)
2629

2730
if(EXECUTORCH_CADENCE_CPU_RUNNER)
2831
include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
@@ -74,10 +77,12 @@ endif()
7477

7578
if(EXECUTORCH_NNLIB_OPT)
7679
set(TARGET_DIR hifi)
77-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib)
80+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib
81+
${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
7882
elseif(EXECUTORCH_FUSION_G3_OPT)
7983
set(TARGET_DIR fusion_g3)
80-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib)
84+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib
85+
${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
8186
else()
8287
set(TARGET_DIR reference)
8388
endif()

backends/cadence/aot/functions_hifi.yaml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -219,11 +219,6 @@
219219
- arg_meta: null
220220
kernel_name: cadence::impl::HiFi::quantized_relu_per_tensor_out
221221

222-
- func: cadence::quantized_linear.per_tensor_out(Tensor src, Tensor weight, Tensor bias, SymInt src_zero_point, SymInt weight_zero_point, SymInt out_multiplier, SymInt out_shift, SymInt out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
223-
kernels:
224-
- arg_meta: null
225-
kernel_name: cadence::impl::HiFi::quantized_linear_per_tensor_out
226-
227222
- func: cadence::quantized_fully_connected.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
228223
kernels:
229224
- arg_meta: null

backends/cadence/fusion_g3/operators/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ target_link_libraries(aten_ops_cadence PUBLIC executorch)
6464
target_link_libraries(aten_ops_cadence PRIVATE xa_nnlib)
6565

6666
# Let files say "include <executorch/path/to/header.h>".
67-
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
67+
set(_common_include_directories ${EXECUTORCH_ROOT}/..
68+
${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
6869

6970
target_include_directories(
7071
aten_ops_cadence PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR}

backends/cadence/hifi/kernels/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ add_library(
2525
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_transpose_32.c
2626
)
2727
# Let files say "include <executorch/path/to/header.h>".
28-
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
28+
set(_common_include_directories ${EXECUTORCH_ROOT}/..
29+
${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
2930

3031
target_include_directories(
3132
cadence_kernels

backends/cadence/hifi/operators/CMakeLists.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ target_link_libraries(aten_ops_cadence PUBLIC executorch)
6767
target_link_libraries(aten_ops_cadence PRIVATE cadence_kernels)
6868

6969
# Let files say "include <executorch/path/to/header.h>".
70-
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
70+
set(_common_include_directories ${EXECUTORCH_ROOT}/..
71+
${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
7172

7273
target_include_directories(
7374
aten_ops_cadence PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR}
@@ -77,7 +78,7 @@ target_include_directories(
7778
# Custom ops that are needed to run the test model.
7879
add_library(
7980
custom_ops "op_quantized_linear_out.cpp" "op_quantized_layer_norm.cpp"
80-
"op_quantize_per_tensor.cpp" "op_quantized_relu_out.cpp" "op_dequantize_per_tensor.cpp"
81+
"op_quantize_per_tensor.cpp" "op_quantized_relu_out.cpp" "op_dequantize_per_tensor.cpp" "op_quantized_fully_connected_out"
8182
)
8283
target_include_directories(
8384
custom_ops PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR}

backends/cadence/hifi/operators/op_clamp.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ Tensor& clamp_tensor_out(
328328
const executorch::aten::optional<Tensor>& min_opt,
329329
const executorch::aten::optional<Tensor>& max_opt,
330330
Tensor& out) {
331-
clamp_Tensor_out(ctx, in, min_opt, max_opt, out);
331+
return clamp_Tensor_out(ctx, in, min_opt, max_opt, out);
332332
}
333333

334334
} // namespace native

backends/cadence/hifi/operators/op_mean.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ Tensor& mean_dim_out(
175175
bool keepdim,
176176
optional<ScalarType> dtype,
177177
Tensor& out) {
178-
mean_out(ctx, in, dim_list, keepdim, dtype, out);
178+
return mean_out(ctx, in, dim_list, keepdim, dtype, out);
179179
}
180180

181181
} // namespace native

backends/cadence/hifi/operators/op_quantized_relu_out.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,10 @@ void quantized_relu_per_tensor_out(
100100
void quantized_relu_out(
101101
KernelRuntimeContext& ctx,
102102
const Tensor& input,
103-
const int64_t in_zero_point,
103+
const Tensor& in_zero_point,
104104
const int64_t out_zero_point,
105-
const int64_t out_multiplier,
106-
const int64_t out_shift,
105+
const Tensor& out_multiplier,
106+
const Tensor& out_shift,
107107
Tensor& output) {
108108
quantized_relu_per_tensor_out(
109109
ctx,

backends/cadence/hifi/operators/op_softmax.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ Tensor& softmax_out(
200200
int64_t dim,
201201
bool half_to_float,
202202
Tensor& out) {
203-
_softmax_out(ctx, in, dim, half_to_float, out);
203+
return _softmax_out(ctx, in, dim, half_to_float, out);
204204
}
205205

206206
} // namespace native

backends/cadence/hifi/operators/op_where.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,15 @@ Tensor& where_self_out(
183183
return out;
184184
}
185185

186+
Tensor& where_out(
187+
RuntimeContext& ctx,
188+
const Tensor& cond,
189+
const Tensor& a,
190+
const Tensor& b,
191+
Tensor& out) {
192+
return where_out(ctx, cond, a, b, out);
193+
}
194+
186195
} // namespace native
187196
} // namespace HiFi
188197
} // namespace impl

backends/cadence/reference/kernels/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
add_library(cadence_kernels kernels.cpp)
99

1010
# Let files say "include <executorch/path/to/header.h>".
11-
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
11+
set(_common_include_directories ${EXECUTORCH_ROOT}/..
12+
${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
1213

1314
target_include_directories(cadence_kernels PUBLIC .
1415
${_common_include_directories}

backends/cadence/reference/operators/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ target_link_libraries(aten_ops_cadence PUBLIC executorch)
7171
target_link_libraries(aten_ops_cadence PRIVATE cadence_kernels)
7272

7373
# Let files say "include <executorch/path/to/header.h>".
74-
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
74+
set(_common_include_directories ${EXECUTORCH_ROOT}/..
75+
${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
7576

7677
target_include_directories(
7778
aten_ops_cadence PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR}

backends/transforms/view_copy_to_squeeze_unsqueeze.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,11 @@ def find_unsqueeze_dim(
7575
j = 0
7676
idx = -1
7777
while j < len(view_shape):
78-
if input_shape[i] != view_shape[j]:
78+
# account for added dim being last dim in view_shape
79+
if i == j and j == len(input_shape):
80+
if view_shape[j] != 1:
81+
return None
82+
elif input_shape[i] != view_shape[j]:
7983
if view_shape[j] == 1:
8084
idx = j
8185
i -= 1

examples/models/llama/static_attention.py

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -114,15 +114,30 @@ def update(
114114
return all_data, (out_k_cache, out_v_cache)
115115

116116

117-
def _apply_rotary_embedding(
118-
x: torch.Tensor, freqs_cos: torch.Tensor, freqs_sin: torch.Tensor
119-
) -> torch.Tensor:
120-
x_r, x_i = x[..., ::2], x[..., 1::2]
121-
x_out_r = x_r * freqs_cos - x_i * freqs_sin
122-
x_out_i = x_r * freqs_sin + x_i * freqs_cos
117+
class _Rope(nn.Module):
118+
def __init__(self, use_hf_rope):
119+
super().__init__()
120+
self.use_hf_rope = use_hf_rope
121+
122+
def forward(
123+
self, x: torch.Tensor, freqs_cos: torch.Tensor, freqs_sin: torch.Tensor
124+
) -> torch.Tensor:
125+
if self.use_hf_rope:
126+
if len(freqs_cos.shape) == 2:
127+
freqs_cos = freqs_cos.unsqueeze(0)
128+
if len(freqs_sin.shape) == 2:
129+
freqs_sin = freqs_sin.unsqueeze(0)
130+
x1 = x[..., : x.shape[-1] // 2]
131+
x2 = x[..., x.shape[-1] // 2 :]
132+
x_rotated = torch.cat((-x2, x1), dim=-1)
133+
return x * freqs_cos + x_rotated * freqs_sin
134+
else:
135+
x_r, x_i = x[..., ::2], x[..., 1::2]
136+
x_out_r = x_r * freqs_cos - x_i * freqs_sin
137+
x_out_i = x_r * freqs_sin + x_i * freqs_cos
123138

124-
x_out = torch.cat([x_out_r, x_out_i], dim=-1)
125-
return x_out
139+
x_out = torch.cat([x_out_r, x_out_i], dim=-1)
140+
return x_out
126141

127142

128143
@register_attention("static")
@@ -172,6 +187,7 @@ def __init__(self, config: ModelArgs, layer_id: int, rope: Rope):
172187
[StaticVCache(layer_id, i) for i in range(self.n_kv_heads)]
173188
)
174189
self.wo = nn.Linear(self.n_heads * self.head_dim, self.dim, bias=False)
190+
self.rope = _Rope(rope.params.use_hf_rope)
175191

176192
def forward(
177193
self,
@@ -191,8 +207,8 @@ def forward(
191207
new_qs = [self.wqs[i](x) for i in range(self.n_heads)]
192208
new_ks = [self.wks[i](x) for i in range(self.n_kv_heads)]
193209
new_vs = [self.wvs[i](x) for i in range(self.n_kv_heads)]
194-
new_qs = [_apply_rotary_embedding(q, freqs_cos, freqs_sin) for q in new_qs]
195-
new_ks = [_apply_rotary_embedding(k, freqs_cos, freqs_sin) for k in new_ks]
210+
new_qs = [self.rope(q, freqs_cos, freqs_sin) for q in new_qs]
211+
new_ks = [self.rope(k, freqs_cos, freqs_sin) for k in new_ks]
196212

197213
all_ks = []
198214
all_vs = []
@@ -211,7 +227,7 @@ def forward(
211227
kv_idx = i // self.n_heads_per_kv_group
212228
attn = new_qs[i] @ all_ks[kv_idx].transpose(-2, -1)
213229
attn = attn * self.inv_scale
214-
attn = attn + mask # pyre-ignore
230+
attn = attn + mask
215231
attn = F.softmax(attn, dim=-1)
216232
heads.append(attn @ all_vs[kv_idx])
217233

examples/models/llama/tests/test_static_attention.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,35 @@ def test_without_cache(self):
4343
)
4444
self.assertTrue(torch.isclose(y, expected, rtol=1e-3).all())
4545

46+
def test_hf_rope_without_cache(self):
47+
config = ModelArgs(
48+
dim=64,
49+
n_heads=4,
50+
n_kv_heads=2,
51+
max_seq_len=8,
52+
use_hf_rope=True,
53+
)
54+
layer_id = 0
55+
rope = Rope(config)
56+
attn_mha = AttentionMHA(config, layer_id, rope).eval()
57+
static_attn = StaticAttention(config, layer_id, rope).eval()
58+
static_attn.load_weights_from_attention_mha(attn_mha)
59+
60+
x = torch.rand(1, config.max_seq_len, config.dim)
61+
freqs_cos, freqs_sin = rope.get_freqs(None, config.max_seq_len)
62+
expected, _ = attn_mha(x, freqs_cos, freqs_sin)
63+
mask = torch.triu(
64+
torch.full((1, config.max_seq_len, config.max_seq_len), float("-inf")),
65+
diagonal=1,
66+
)
67+
y, _ = static_attn(
68+
x,
69+
freqs_cos.unsqueeze(0),
70+
freqs_sin.unsqueeze(0),
71+
mask=mask,
72+
)
73+
self.assertTrue(torch.isclose(y, expected, rtol=1e-3).all())
74+
4675
def test_with_cache(self):
4776
config = ModelArgs(
4877
dim=64,

extension/flat_tensor/targets.bzl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,15 @@ def define_common_targets():
99
exported_headers = ["flat_tensor_data_map.h"],
1010
deps = [
1111
"//executorch/extension/flat_tensor/serialize:generated_headers",
12-
"//executorch/extension/flat_tensor/serialize:flat_tensor_header",
1312
"//executorch/runtime/core:core",
1413
"//executorch/runtime/core:evalue",
1514
"//executorch/runtime/core:named_data_map",
1615
"//executorch/runtime/core/exec_aten:lib",
1716
"//executorch/runtime/core/exec_aten/util:tensor_util",
1817
],
18+
exported_deps = [
19+
"//executorch/extension/flat_tensor/serialize:flat_tensor_header",
20+
],
1921
visibility = [
2022
"//executorch/...",
2123
],

0 commit comments

Comments
 (0)