|
1 |
| -# Copyright (c) Meta Platforms, Inc. and affiliates. |
2 |
| -# |
3 |
| -# This source code is licensed under both the MIT license found in the |
4 |
| -# LICENSE-MIT file in the root directory of this source tree and the Apache |
5 |
| -# License, Version 2.0 found in the LICENSE-APACHE file in the root directory |
6 |
| -# of this source tree. |
7 |
| - |
8 | 1 | load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
|
9 | 2 | load("@fbsource//xplat/executorch/build:selects.bzl", "selects")
|
10 | 3 | load(
|
@@ -106,7 +99,7 @@ def define_op_library(name, compiler_flags, deps):
|
106 | 99 | # pragma unroll fails with -Os, don't need to warn us and
|
107 | 100 | # fail Werror builds; see https://godbolt.org/z/zvf85vTsr
|
108 | 101 | "-Wno-pass-failed",
|
109 |
| - ] + get_compiler_optimization_flags(), |
| 102 | + ] + compiler_flags + get_compiler_optimization_flags(), |
110 | 103 | deps = [
|
111 | 104 | "//executorch/runtime/kernel:kernel_includes",
|
112 | 105 | ] + augmented_deps + get_vec_deps(),
|
@@ -144,3 +137,124 @@ def define_op_target(name, compiler_flags, deps):
|
144 | 137 | compiler_flags = compiler_flags,
|
145 | 138 | deps = deps,
|
146 | 139 | )
|
| 140 | + |
| 141 | +OPTIMIZED_ATEN_OPS = ( |
| 142 | + op_target( |
| 143 | + name = "op_add", |
| 144 | + deps = [ |
| 145 | + ":binary_ops", |
| 146 | + ":add_sub_impl", |
| 147 | + "//executorch/kernels/portable/cpu:scalar_utils", |
| 148 | + "//executorch/kernels/portable/cpu/util:broadcast_util", |
| 149 | + ], |
| 150 | + ), |
| 151 | + op_target( |
| 152 | + name = "op_bmm", |
| 153 | + deps = [ |
| 154 | + "//executorch/kernels/optimized:libblas", |
| 155 | + "//executorch/kernels/portable/cpu/util:matmul_ops_util", |
| 156 | + ], |
| 157 | + ), |
| 158 | + op_target( |
| 159 | + name = "op_div", |
| 160 | + deps = [ |
| 161 | + ":binary_ops", |
| 162 | + "//executorch/kernels/portable/cpu:scalar_utils", |
| 163 | + "//executorch/kernels/portable/cpu/util:broadcast_util", |
| 164 | + ], |
| 165 | + ), |
| 166 | + op_target( |
| 167 | + name = "op_elu", |
| 168 | + deps = [ |
| 169 | + "//executorch/extension/threadpool:threadpool", |
| 170 | + "//executorch/kernels/portable/cpu:scalar_utils", |
| 171 | + "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch", |
| 172 | + ], |
| 173 | + ), |
| 174 | + op_target(name = "op_exp"), |
| 175 | + op_target( |
| 176 | + name = "op_fft_c2r", |
| 177 | + compiler_flags = [] if runtime.is_oss else [ |
| 178 | + "-Wno-global-constructors", |
| 179 | + "-Wno-shadow", |
| 180 | + ], |
| 181 | + deps = [":fft_utils"], |
| 182 | + ), |
| 183 | + op_target( |
| 184 | + name = "op_fft_r2c", |
| 185 | + compiler_flags = [] if runtime.is_oss else [ |
| 186 | + "-Wno-global-constructors", |
| 187 | + "-Wno-shadow", |
| 188 | + ], |
| 189 | + deps = [":fft_utils"], |
| 190 | + ), |
| 191 | + op_target(name = "op_sigmoid"), |
| 192 | + op_target( |
| 193 | + name = "op_gelu", |
| 194 | + deps = [ |
| 195 | + "//executorch/kernels/portable/cpu/util:activation_ops_util", |
| 196 | + "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch", |
| 197 | + ], |
| 198 | + ), |
| 199 | + op_target( |
| 200 | + name = "op_le", |
| 201 | + deps = [ |
| 202 | + "//executorch/kernels/portable/cpu:scalar_utils", |
| 203 | + "//executorch/kernels/portable/cpu/util:broadcast_util", |
| 204 | + ], |
| 205 | + ), |
| 206 | + op_target( |
| 207 | + name = "op_linear", |
| 208 | + deps = [ |
| 209 | + "//executorch/kernels/optimized:libblas", |
| 210 | + "//executorch/kernels/portable/cpu/util:matmul_ops_util", |
| 211 | + ], |
| 212 | + ), |
| 213 | + op_target( |
| 214 | + name = "op_log_softmax", |
| 215 | + deps = [ |
| 216 | + "//executorch/kernels/portable/cpu/util:activation_ops_util", |
| 217 | + "//executorch/runtime/core/portable_type/c10/c10:aten_headers_for_executorch", |
| 218 | + ], |
| 219 | + ), |
| 220 | + op_target( |
| 221 | + name = "op_mm", |
| 222 | + deps = [ |
| 223 | + "//executorch/kernels/optimized:libblas", |
| 224 | + "//executorch/kernels/portable/cpu/util:matmul_ops_util", |
| 225 | + ], |
| 226 | + ), |
| 227 | + op_target( |
| 228 | + name = "op_mul", |
| 229 | + deps = [ |
| 230 | + ":binary_ops", |
| 231 | + "//executorch/kernels/portable/cpu:scalar_utils", |
| 232 | + "//executorch/kernels/portable/cpu/util:broadcast_util", |
| 233 | + "//executorch/runtime/core/exec_aten/util:tensor_util", |
| 234 | + ], |
| 235 | + ), |
| 236 | + op_target( |
| 237 | + name = "op_native_layer_norm", |
| 238 | + deps = [ |
| 239 | + ":moments_utils", |
| 240 | + "//executorch/kernels/portable/cpu/util:normalization_ops_util", |
| 241 | + ], |
| 242 | + ), |
| 243 | + op_target(name = "op_neg"), |
| 244 | + op_target( |
| 245 | + name = "op_sub", |
| 246 | + deps = [ |
| 247 | + ":binary_ops", |
| 248 | + ":add_sub_impl", |
| 249 | + "//executorch/kernels/portable/cpu:scalar_utils", |
| 250 | + "//executorch/kernels/portable/cpu/util:broadcast_util", |
| 251 | + ], |
| 252 | + ), |
| 253 | + op_target( |
| 254 | + name = "op_where", |
| 255 | + deps = [ |
| 256 | + "//executorch/extension/threadpool:threadpool", |
| 257 | + "//executorch/kernels/portable/cpu/util:elementwise_util", |
| 258 | + ], |
| 259 | + ), |
| 260 | +) |
0 commit comments