Skip to content

Commit 1720f2f

Browse files
authored
Add vectorized_math.h (#11204)
Set of math functions that work on both scalars and at::vec::Vectorized, to be used in #9432.
1 parent c2aa614 commit 1720f2f

File tree

9 files changed

+278
-23
lines changed

9 files changed

+278
-23
lines changed

.lintrunner.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,8 @@ exclude_patterns = [
271271
'examples/**',
272272
'exir/verification/bindings.cpp',
273273
'extension/**',
274+
# Uses properly-gated (ET_USE_PYTORCH_HEADERS) ATen include.
275+
'kernels/portable/cpu/util/vectorized_math.h',
274276
'kernels/optimized/**',
275277
'runtime/core/exec_aten/**',
276278
# Want to be able to keep c10 in sync with PyTorch core.

kernels/portable/cpu/util/targets.bzl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,16 @@ def define_common_targets():
307307
],
308308
)
309309

310+
runtime.cxx_library(
311+
name = "vectorized_math",
312+
exported_headers = ["vectorized_math.h"],
313+
visibility = ["//executorch/..."],
314+
exported_deps = [
315+
"//executorch/runtime/core/portable_type:portable_type",
316+
"//executorch/runtime/core/exec_aten/util:scalar_type_util",
317+
],
318+
)
319+
310320
# Utility functions that can be used by operators that perform reduction
311321
for aten_mode in get_aten_mode_options():
312322
suffix = "_aten" if aten_mode else ""

kernels/portable/cpu/util/test/CMakeLists.txt

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,22 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66

7-
# @generated by test/utils/generate_gtest_cmakelists.py
8-
#
9-
# This file should be formatted with
10-
# ~~~
11-
# cmake-format -i CMakeLists.txt
12-
# ~~~
13-
# It should also be cmake-lint clean.
14-
#
15-
167
cmake_minimum_required(VERSION 3.19)
178

189
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../../..)
1910

2011
include(${EXECUTORCH_ROOT}/tools/cmake/Test.cmake)
12+
include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
2113

2214
set(_test_srcs broadcast_indexes_range_test.cpp broadcast_test.cpp
23-
reduce_test.cpp
15+
reduce_test.cpp vectorized_math_test.cpp
2416
)
2517

2618
et_cxx_test(
2719
kernels_portable_cpu_util_test SOURCES ${_test_srcs} EXTRA_LIBS
2820
portable_kernels portable_ops_lib
2921
)
22+
23+
find_package_torch_headers()
24+
target_include_directories(kernels_portable_cpu_util_test PRIVATE ${TORCH_INCLUDE_DIRS})
25+
target_compile_definitions(kernels_portable_cpu_util_test PRIVATE ET_USE_PYTORCH_HEADERS)

kernels/portable/cpu/util/test/targets.bzl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,14 @@ def define_common_targets():
3232
"//executorch/kernels/portable/cpu/util:reduce_util",
3333
],
3434
)
35+
36+
# this test requires ET_USE_PYTORCH_HEADERS, which doesn't work in OSS Buck.
37+
if not runtime.is_oss:
38+
runtime.cxx_test(
39+
name = "vectorized_math_test",
40+
srcs = ["vectorized_math_test.cpp"],
41+
deps = [
42+
"//executorch/kernels/portable/cpu/util:vectorized_math",
43+
"//executorch/runtime/core/portable_type/c10/c10:c10",
44+
],
45+
)
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/kernels/portable/cpu/util/vectorized_math.h>
10+
11+
#include <c10/util/irange.h>
12+
13+
#include <gtest/gtest.h>
14+
15+
#include <cstdint>
16+
17+
#ifndef ET_USE_PYTORCH_HEADERS
18+
#error "This test requires ET_USE_PYTORCH_HEADERS!"
19+
#endif // ET_USE_PYTORCH_HEADERS
20+
21+
TEST(VectorizedMathTest, BasicUnary) {
22+
__at_align__ float result_floats[at::vec::Vectorized<float>::size()] = {0};
23+
const auto x_vec = at::vec::Vectorized<float>::arange(0, 1);
24+
const auto result_vec = executorch::math::exp(x_vec);
25+
result_vec.store(result_floats);
26+
for (const auto ii : c10::irange(at::vec::Vectorized<float>::size())) {
27+
EXPECT_FLOAT_EQ(result_floats[ii], std::exp(ii));
28+
}
29+
}
30+
31+
namespace {
32+
template <typename T>
33+
void test_unary_t_to_float() {
34+
__at_align__ float result_floats[at::vec::Vectorized<T>::size()] = {0};
35+
const auto x_vec = at::vec::Vectorized<T>::arange(0, 1);
36+
const auto result_vec = executorch::math::exp(x_vec);
37+
static_assert(decltype(result_vec)::size() >= at::vec::Vectorized<T>::size());
38+
result_vec.store(result_floats, at::vec::Vectorized<T>::size());
39+
for (const auto ii : c10::irange(at::vec::Vectorized<T>::size())) {
40+
EXPECT_EQ(result_floats[ii], std::exp((float)ii)) << ii;
41+
}
42+
}
43+
44+
} // namespace
45+
46+
TEST(VectorizedMathTest, UnaryInt16ToFloat) {
47+
test_unary_t_to_float<std::uint16_t>();
48+
}
49+
50+
TEST(VectorizedMathTest, UnaryInt32ToFloat) {
51+
test_unary_t_to_float<std::uint32_t>();
52+
}
53+
54+
TEST(VectorizedMathTest, UnaryInt64ToFloat) {
55+
test_unary_t_to_float<std::uint64_t>();
56+
}
57+
58+
TEST(VectorizedMathTest, BasicBinary) {
59+
__at_align__ float result_floats[at::vec::Vectorized<float>::size()] = {0};
60+
const auto x_vec = at::vec::Vectorized<float>::arange(0, 1);
61+
const auto y_vec = at::vec::Vectorized<float>(2);
62+
const auto result_vec = executorch::math::pow(x_vec, y_vec);
63+
result_vec.store(result_floats);
64+
for (const auto ii : c10::irange(at::vec::Vectorized<float>::size())) {
65+
EXPECT_FLOAT_EQ(result_floats[ii], std::pow((float)ii, 2.0f));
66+
}
67+
}
68+
69+
namespace {
70+
template <typename T>
71+
void test_binary_t_to_float() {
72+
__at_align__ float result_floats[at::vec::Vectorized<T>::size()] = {0};
73+
const auto x_vec = at::vec::Vectorized<T>::arange(0, 1);
74+
const auto y_vec = at::vec::Vectorized<T>(2);
75+
const auto result_vec = executorch::math::pow(x_vec, y_vec);
76+
static_assert(decltype(result_vec)::size() >= at::vec::Vectorized<T>::size());
77+
result_vec.store(result_floats, at::vec::Vectorized<T>::size());
78+
for (const auto ii : c10::irange(at::vec::Vectorized<T>::size())) {
79+
EXPECT_EQ(result_floats[ii], std::pow((float)ii, 2.0f)) << ii;
80+
}
81+
}
82+
83+
TEST(VectorizedMathTest, BinaryInt16ToFloat) {
84+
test_binary_t_to_float<std::int16_t>();
85+
}
86+
87+
TEST(VectorizedMathTest, BinaryInt32ToFloat) {
88+
test_binary_t_to_float<std::int32_t>();
89+
}
90+
91+
TEST(VectorizedMathTest, BinaryInt64ToFloat) {
92+
test_binary_t_to_float<std::uint64_t>();
93+
}
94+
95+
} // namespace
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
12+
13+
#ifdef ET_USE_PYTORCH_HEADERS
14+
#include <ATen/cpu/vec/vec.h>
15+
#endif // ET_USE_PYTORCH_HEADERS
16+
17+
#include <iostream>
18+
#include <type_traits>
19+
20+
#ifdef ET_USE_PYTORCH_HEADERS
21+
namespace executorch {
22+
inline namespace math {
23+
namespace internal {
24+
template <typename T>
25+
auto convert_to_vectorized_n_of_float(at::vec::Vectorized<T> vec) {
26+
static constexpr auto float_vec_size = at::vec::Vectorized<float>::size();
27+
static constexpr auto t_vec_size = at::vec::Vectorized<T>::size();
28+
static constexpr auto result_size =
29+
t_vec_size < float_vec_size ? 1 : t_vec_size / float_vec_size;
30+
static_assert(result_size >= 1);
31+
return at::vec::convert<float, result_size, T, 1, /*keep=*/true>(
32+
at::vec::VectorizedN<T, 1>(vec));
33+
}
34+
} // namespace internal
35+
} // namespace math
36+
} // namespace executorch
37+
#endif // ET_USE_PYTORCH_HEADERS
38+
39+
#define _ET_INTERNAL_STD_MATH_FUNC(name) \
40+
namespace executorch { \
41+
inline namespace math { \
42+
using std::name; \
43+
} \
44+
} // namespace executorch
45+
46+
#ifdef ET_USE_PYTORCH_HEADERS
47+
/**
48+
* Internal-usage macro for making a vectorized variant of a unary
49+
* function available in the executorch::math namespace.
50+
*/
51+
#define ET_INTERNAL_VECTORIZED_FLOAT_UNARY_FUNC(func_name) \
52+
namespace executorch { \
53+
inline namespace math { \
54+
template <typename T> \
55+
auto func_name(at::vec::Vectorized<T> vec) { \
56+
if constexpr (!::executorch::runtime::is_floating_point<T>::value) { \
57+
return internal::convert_to_vectorized_n_of_float(vec).func_name(); \
58+
} else { \
59+
return vec.func_name(); \
60+
} \
61+
} \
62+
} \
63+
}
64+
65+
#define ET_INTERNAL_VECTORIZED_FLOAT_BINARY_FUNC(func_name) \
66+
namespace executorch { \
67+
inline namespace math { \
68+
template <typename T> \
69+
auto func_name(at::vec::Vectorized<T> vec0, at::vec::Vectorized<T> vec1) { \
70+
if constexpr (!::executorch::runtime::is_floating_point<T>::value) { \
71+
const auto vec_float0 = \
72+
internal::convert_to_vectorized_n_of_float(vec0); \
73+
const auto vec_float1 = \
74+
internal::convert_to_vectorized_n_of_float(vec1); \
75+
return vec_float0.func_name(vec_float1); \
76+
} else { \
77+
return vec0.func_name(vec1); \
78+
} \
79+
} \
80+
} \
81+
}
82+
83+
/**
84+
* Internal-usage macro for making a C++ standard library
85+
* floating-point function and a vectorized variant of it available in
86+
* the c10::math namespace. Should be used with functions where the
87+
* corresponding operator is a "float op" in TensorIterator parlance
88+
* (i.e., uses something like build_borrowing_binary_float_op()),
89+
* because it converts non-floating-point arguments to floating point.
90+
*/
91+
#define ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(func_name) \
92+
_ET_INTERNAL_STD_MATH_FUNC(func_name) \
93+
ET_INTERNAL_VECTORIZED_FLOAT_UNARY_FUNC(func_name)
94+
95+
#define ET_INTERNAL_VECTORIZED_STD_FLOAT_BINARY_FUNC(func_name) \
96+
_ET_INTERNAL_STD_MATH_FUNC(func_name) \
97+
ET_INTERNAL_VECTORIZED_FLOAT_BINARY_FUNC(func_name)
98+
99+
#else // ET_USE_PYTORCH_HEADERS
100+
#define ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(name) \
101+
_ET_INTERNAL_STD_MATH_FUNC(name)
102+
#define ET_INTERNAL_VECTORIZED_STD_FLOAT_BINARY_FUNC(name) \
103+
_ET_INTERNAL_STD_MATH_FUNC(name)
104+
#endif // ET_USE_PYTORCH_HEADERS
105+
106+
// To simplify client code, we provide coverage for a bunch of float ops (the
107+
// same ones listed in ATen vml.h) here.
108+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(abs)
109+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(acos)
110+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(asin)
111+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(atan)
112+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(ceil)
113+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(cos)
114+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(cosh)
115+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(erf)
116+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(erfc)
117+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(exp)
118+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(expm1)
119+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(floor)
120+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(log)
121+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(log10)
122+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(log1p)
123+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(log2)
124+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(sin)
125+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(sinh)
126+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(sqrt)
127+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(round)
128+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(tan)
129+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(tanh)
130+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(trunc)
131+
ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(lgamma)
132+
133+
#ifdef ET_USE_PYTORCH_HEADERS
134+
ET_INTERNAL_VECTORIZED_FLOAT_BINARY_FUNC(rsqrt)
135+
#endif // ET_USE_PYTORCH_HEADERS
136+
137+
namespace executorch {
138+
inline namespace math {
139+
template <typename T, std::enable_if_t<std::is_floating_point_v<T>>>
140+
T rsqrt(T x) {
141+
return T(1) / std::sqrt(x);
142+
}
143+
} // namespace math
144+
} // namespace executorch
145+
146+
ET_INTERNAL_VECTORIZED_STD_FLOAT_BINARY_FUNC(atan2)
147+
ET_INTERNAL_VECTORIZED_STD_FLOAT_BINARY_FUNC(fmod)
148+
ET_INTERNAL_VECTORIZED_STD_FLOAT_BINARY_FUNC(pow)

runtime/core/portable_type/c10/c10/targets.bzl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,11 @@ def define_common_targets():
5353
runtime.cxx_library(
5454
name = "aten_headers_for_executorch",
5555
srcs = [],
56-
visibility = ["//executorch/kernels/optimized/...", "@EXECUTORCH_CLIENTS"],
56+
visibility = [
57+
"//executorch/kernels/optimized/...",
58+
"//executorch/kernels/portable/cpu/util/...",
59+
"@EXECUTORCH_CLIENTS",
60+
],
5761
exported_deps = select({
5862
"DEFAULT": [],
5963
"ovr_config//cpu:arm64": [

runtime/core/portable_type/targets.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def define_common_targets():
2626
visibility = [
2727
"//executorch/backends/...",
2828
"//executorch/extension/fb/dynamic_shim/...",
29+
"//executorch/kernels/portable/cpu/...",
2930
"//executorch/runtime/core/exec_aten/...",
3031
"//executorch/runtime/core/portable_type/test/...",
3132
],

test/utils/OSSTestConfig.json

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -68,18 +68,6 @@
6868
"extension_threadpool"
6969
]
7070
},
71-
{
72-
"directory": "kernels/portable/cpu/util/test",
73-
"sources": [
74-
"broadcast_indexes_range_test.cpp",
75-
"broadcast_test.cpp",
76-
"reduce_test.cpp"
77-
],
78-
"additional_libs": [
79-
"portable_kernels",
80-
"portable_ops_lib"
81-
]
82-
},
8371
{
8472
"directory": "runtime/core/portable_type/test",
8573
"sources": [

0 commit comments

Comments
 (0)