Skip to content

Commit 1be369c

Browse files
committed
[libc++] <experimental/simd> Add copy functions for class simd/simd_mask
1 parent 6a6f9bf commit 1be369c

File tree

7 files changed

+346
-0
lines changed

7 files changed

+346
-0
lines changed

libcxx/docs/Status/ParallelismProjects.csv

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Section,Description,Dependencies,Assignee,Complete
2424
| `[parallel.simd.class] <https://wg21.link/N4808>`_, "`simd generate constructor <https://reviews.llvm.org/D159442>`_", None, Yin Zhang, |Complete|
2525
| `[parallel.simd.class] <https://wg21.link/N4808>`_, "`simd load constructor <https://github.com/llvm/llvm-project/pull/76610>`_", None, Yin Zhang, |Complete|
2626
| `[parallel.simd.class] <https://wg21.link/N4808>`_, "`simd subscript operators <https://github.com/llvm/llvm-project/pull/68960>`_", None, Yin Zhang, |Complete|
27+
| `[parallel.simd.class] <https://wg21.link/N4808>`_, "`simd copy functions <https://github.com/llvm/llvm-project/pull/78935>`_", None, Yin Zhang, |Complete|
2728
| `[parallel.simd.class] <https://wg21.link/N4808>`_, "Class template simd implementation", None, Yin Zhang, |In Progress|
2829
| `[parallel.simd.nonmembers] <https://wg21.link/N4808>`_, "simd non-member operations", None, Yin Zhang, |In Progress|
2930
| `[parallel.simd.mask.class] <https://wg21.link/N4808>`_, "`Class template simd_mask declaration and alias <https://reviews.llvm.org/D144362>`_", [parallel.simd.abi], Yin Zhang, |Complete|
@@ -33,5 +34,6 @@ Section,Description,Dependencies,Assignee,Complete
3334
| `[parallel.simd.mask.class] <https://wg21.link/N4808>`_, "`simd_mask implicit type conversion constructor <https://github.com/llvm/llvm-project/pull/71132>`_", None, Yin Zhang, |Complete|
3435
| `[parallel.simd.mask.class] <https://wg21.link/N4808>`_, "`simd_mask load constructor <https://github.com/llvm/llvm-project/pull/76610>`_", None, Yin Zhang, |Complete|
3536
| `[parallel.simd.mask.class] <https://wg21.link/N4808>`_, "`simd_mask subscript operators <https://github.com/llvm/llvm-project/pull/68960>`_", None, Yin Zhang, |Complete|
37+
| `[parallel.simd.mask.class] <https://wg21.link/N4808>`_, "`simd_mask copy functions <https://github.com/llvm/llvm-project/pull/78935>`_", None, Yin Zhang, |Complete|
3638
| `[parallel.simd.mask.class] <https://wg21.link/N4808>`_, "Class template simd_mask implementation", None, Yin Zhang, |In Progress|
3739
| `[parallel.simd.mask.nonmembers] <https://wg21.link/N4808>`_, "simd_mask non-member operations", None, Yin Zhang, |In Progress|

libcxx/include/experimental/__simd/scalar.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ struct __simd_operations<_Tp, simd_abi::__scalar> {
6262
static _LIBCPP_HIDE_FROM_ABI void __load(_SimdStorage& __s, const _Up* __mem) noexcept {
6363
__s.__data = static_cast<_Tp>(__mem[0]);
6464
}
65+
66+
template <class _Up>
67+
static _LIBCPP_HIDE_FROM_ABI void __store(_SimdStorage __s, _Up* __mem) noexcept {
68+
*__mem = static_cast<_Up>(__s.__data);
69+
}
6570
};
6671

6772
template <class _Tp>
@@ -71,6 +76,8 @@ struct __mask_operations<_Tp, simd_abi::__scalar> {
7176
static _LIBCPP_HIDE_FROM_ABI _MaskStorage __broadcast(bool __v) noexcept { return {__v}; }
7277

7378
static _LIBCPP_HIDE_FROM_ABI void __load(_MaskStorage& __s, const bool* __mem) noexcept { __s.__data = __mem[0]; }
79+
80+
static _LIBCPP_HIDE_FROM_ABI void __store(_MaskStorage __s, bool* __mem) noexcept { __mem[0] = __s.__data; }
7481
};
7582

7683
} // namespace parallelism_v2

libcxx/include/experimental/__simd/simd.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,19 @@ class simd {
7070
_Impl::__load(__s_, _Flags::template __apply<simd>(__mem));
7171
}
7272

73+
// copy functions
74+
template <class _Up, class _Flags>
75+
_LIBCPP_HIDE_FROM_ABI enable_if_t<__is_vectorizable_v<_Up> && is_simd_flag_type_v<_Flags>>
76+
copy_from(const _Up* __mem, _Flags) {
77+
_Impl::__load(__s_, _Flags::template __apply<simd>(__mem));
78+
}
79+
80+
template <class _Up, class _Flags>
81+
_LIBCPP_HIDE_FROM_ABI enable_if_t<__is_vectorizable_v<_Up> && is_simd_flag_type_v<_Flags>>
82+
copy_to(_Up* __mem, _Flags) const {
83+
_Impl::__store(__s_, _Flags::template __apply<simd>(__mem));
84+
}
85+
7386
// scalar access [simd.subscr]
7487
_LIBCPP_HIDE_FROM_ABI reference operator[](size_t __i) noexcept { return reference(__s_, __i); }
7588
_LIBCPP_HIDE_FROM_ABI value_type operator[](size_t __i) const noexcept { return __s_.__get(__i); }

libcxx/include/experimental/__simd/simd_mask.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,17 @@ class simd_mask {
5858
_Impl::__load(__s_, _Flags::template __apply<simd_mask>(__mem));
5959
}
6060

61+
// copy functions
62+
template <class _Flags>
63+
_LIBCPP_HIDE_FROM_ABI enable_if_t<is_simd_flag_type_v<_Flags>> copy_from(const value_type* __mem, _Flags) {
64+
_Impl::__load(__s_, _Flags::template __apply<simd_mask>(__mem));
65+
}
66+
67+
template <class _Flags>
68+
_LIBCPP_HIDE_FROM_ABI enable_if_t<is_simd_flag_type_v<_Flags>> copy_to(value_type* __mem, _Flags) const {
69+
_Impl::__store(__s_, _Flags::template __apply<simd_mask>(__mem));
70+
}
71+
6172
// scalar access [simd.mask.subscr]
6273
_LIBCPP_HIDE_FROM_ABI reference operator[](size_t __i) noexcept { return reference(__s_, __i); }
6374
_LIBCPP_HIDE_FROM_ABI value_type operator[](size_t __i) const noexcept { return __s_.__get(__i); }

libcxx/include/experimental/__simd/vec_ext.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,12 @@ struct __simd_operations<_Tp, simd_abi::__vec_ext<_Np>> {
8080
for (size_t __i = 0; __i < _Np; __i++)
8181
__s.__data[__i] = static_cast<_Tp>(__mem[__i]);
8282
}
83+
84+
template <class _Up>
85+
static _LIBCPP_HIDE_FROM_ABI void __store(_SimdStorage __s, _Up* __mem) noexcept {
86+
for (size_t __i = 0; __i < _Np; __i++)
87+
__mem[__i] = static_cast<_Up>(__s.__data[__i]);
88+
}
8389
};
8490

8591
template <class _Tp, int _Np>
@@ -99,6 +105,11 @@ struct __mask_operations<_Tp, simd_abi::__vec_ext<_Np>> {
99105
for (size_t __i = 0; __i < _Np; __i++)
100106
__s.__data[__i] = experimental::__set_all_bits<_Tp>(__mem[__i]);
101107
}
108+
109+
static _LIBCPP_HIDE_FROM_ABI void __store(_MaskStorage __s, bool* __mem) noexcept {
110+
for (size_t __i = 0; __i < _Np; __i++)
111+
__mem[__i] = static_cast<bool>(__s.__data[__i]);
112+
}
102113
};
103114

104115
} // namespace parallelism_v2
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// UNSUPPORTED: c++03, c++11, c++14
10+
11+
// <experimental/simd>
12+
//
13+
// [simd.class]
14+
// template<class U, class Flags> void copy_from(const U* mem, Flags);
15+
// template<class U, class Flags> void copy_to(U* mem, Flags) const;
16+
17+
#include "../test_utils.h"
18+
19+
namespace ex = std::experimental::parallelism_v2;
20+
21+
template <class T, class SimdAbi, std::size_t array_size>
22+
struct ElementAlignedCopyFromHelper {
23+
template <class U>
24+
void operator()() const {
25+
constexpr std::size_t alignas_size = alignof(U);
26+
alignas(alignas_size) U buffer[array_size];
27+
for (size_t i = 0; i < array_size; ++i)
28+
buffer[i] = static_cast<U>(i);
29+
ex::simd<T, SimdAbi> origin_simd;
30+
origin_simd.copy_from(buffer, ex::element_aligned_tag());
31+
assert_simd_values_equal(origin_simd, buffer);
32+
}
33+
};
34+
35+
template <class T, class SimdAbi, std::size_t array_size>
36+
struct VectorAlignedCopyFromHelper {
37+
template <class U>
38+
void operator()() const {
39+
constexpr std::size_t alignas_size = ex::memory_alignment_v<ex::simd<T, SimdAbi>, U>;
40+
alignas(alignas_size) U buffer[array_size];
41+
for (size_t i = 0; i < array_size; ++i)
42+
buffer[i] = static_cast<U>(i);
43+
ex::simd<T, SimdAbi> origin_simd;
44+
origin_simd.copy_from(buffer, ex::vector_aligned_tag());
45+
assert_simd_values_equal(origin_simd, buffer);
46+
}
47+
};
48+
49+
template <class T, class SimdAbi, std::size_t array_size>
50+
struct OveralignedCopyFromHelper {
51+
template <class U>
52+
void operator()() const {
53+
constexpr std::size_t alignas_size = bit_ceil(sizeof(U) + 1);
54+
alignas(alignas_size) U buffer[array_size];
55+
for (size_t i = 0; i < array_size; ++i)
56+
buffer[i] = static_cast<U>(i);
57+
ex::simd<T, SimdAbi> origin_simd;
58+
origin_simd.copy_from(buffer, ex::overaligned_tag<alignas_size>());
59+
assert_simd_values_equal(origin_simd, buffer);
60+
}
61+
};
62+
63+
template <class T, std::size_t>
64+
struct CheckSimdCopyFrom {
65+
template <class SimdAbi>
66+
void operator()() {
67+
constexpr std::size_t array_size = ex::simd_size_v<T, SimdAbi>;
68+
69+
types::for_each(simd_test_types(), ElementAlignedCopyFromHelper<T, SimdAbi, array_size>());
70+
types::for_each(simd_test_types(), VectorAlignedCopyFromHelper<T, SimdAbi, array_size>());
71+
types::for_each(simd_test_types(), OveralignedCopyFromHelper<T, SimdAbi, array_size>());
72+
}
73+
};
74+
75+
template <class T, class SimdAbi, std::size_t array_size>
76+
struct ElementAlignedCopyToHelper {
77+
template <class U>
78+
void operator()() const {
79+
constexpr std::size_t alignas_size = alignof(U);
80+
alignas(alignas_size) U buffer[array_size];
81+
ex::simd<T, SimdAbi> origin_simd([](T i) { return i; });
82+
origin_simd.copy_to(buffer, ex::element_aligned_tag());
83+
assert_simd_values_equal(origin_simd, buffer);
84+
}
85+
};
86+
87+
template <class T, class SimdAbi, std::size_t array_size>
88+
struct VectorAlignedCopyToHelper {
89+
template <class U>
90+
void operator()() const {
91+
constexpr std::size_t alignas_size = ex::memory_alignment_v<ex::simd<T, SimdAbi>, U>;
92+
alignas(alignas_size) U buffer[array_size];
93+
ex::simd<T, SimdAbi> origin_simd([](T i) { return i; });
94+
origin_simd.copy_to(buffer, ex::vector_aligned_tag());
95+
assert_simd_values_equal(origin_simd, buffer);
96+
}
97+
};
98+
99+
template <class T, class SimdAbi, std::size_t array_size>
100+
struct OveralignedCopyToHelper {
101+
template <class U>
102+
void operator()() const {
103+
constexpr std::size_t alignas_size = bit_ceil(sizeof(U) + 1);
104+
alignas(alignas_size) U buffer[array_size];
105+
ex::simd<T, SimdAbi> origin_simd([](T i) { return i; });
106+
origin_simd.copy_to(buffer, ex::overaligned_tag<alignas_size>());
107+
assert_simd_values_equal(origin_simd, buffer);
108+
}
109+
};
110+
111+
template <class T, std::size_t>
112+
struct CheckSimdCopyTo {
113+
template <class SimdAbi>
114+
void operator()() {
115+
constexpr std::size_t array_size = ex::simd_size_v<T, SimdAbi>;
116+
117+
types::for_each(simd_test_types(), ElementAlignedCopyToHelper<T, SimdAbi, array_size>());
118+
types::for_each(simd_test_types(), VectorAlignedCopyToHelper<T, SimdAbi, array_size>());
119+
types::for_each(simd_test_types(), OveralignedCopyToHelper<T, SimdAbi, array_size>());
120+
}
121+
};
122+
123+
template <class U, class T, class Flags, class SimdAbi = ex::simd_abi::compatible<T>, class = void>
124+
struct has_copy_from : std::false_type {};
125+
126+
template <class U, class T, class Flags, class SimdAbi>
127+
struct has_copy_from<U,
128+
T,
129+
Flags,
130+
SimdAbi,
131+
std::void_t<decltype(std::declval<ex::simd<T, SimdAbi>>().copy_from(
132+
std::declval<const U*>(), std::declval<Flags>()))>> : std::true_type {};
133+
134+
template <class U, class T, class Flags, class SimdAbi = ex::simd_abi::compatible<T>, class = void>
135+
struct has_copy_to : std::false_type {};
136+
137+
template <class U, class T, class Flags, class SimdAbi>
138+
struct has_copy_to<
139+
U,
140+
T,
141+
Flags,
142+
SimdAbi,
143+
std::void_t<decltype(std::declval<ex::simd<T, SimdAbi>>().copy_to(std::declval<U*>(), std::declval<Flags>()))>>
144+
: std::true_type {};
145+
146+
template <class T, std::size_t>
147+
struct CheckSimdCopyTraits {
148+
template <class SimdAbi>
149+
void operator()() {
150+
// These functions shall not participate in overload resolution unless
151+
// is_simd_flag_type_v<Flags> is true, and
152+
// U is a vectorizable type.
153+
static_assert(has_copy_from<int, T, ex::element_aligned_tag, SimdAbi>::value);
154+
static_assert(has_copy_to<int, T, ex::element_aligned_tag, SimdAbi>::value);
155+
156+
// is_simd_flag_type_v<Flags> is false
157+
static_assert(!has_copy_from<int, T, T, SimdAbi>::value);
158+
static_assert(!has_copy_to<int, T, T, SimdAbi>::value);
159+
static_assert(!has_copy_from<int, T, SimdAbi, SimdAbi>::value);
160+
static_assert(!has_copy_to<int, T, SimdAbi, SimdAbi>::value);
161+
162+
// U is not a vectorizable type.
163+
static_assert(!has_copy_from<SimdAbi, T, ex::element_aligned_tag, SimdAbi>::value);
164+
static_assert(!has_copy_to<SimdAbi, T, ex::element_aligned_tag, SimdAbi>::value);
165+
static_assert(!has_copy_from<ex::element_aligned_tag, T, ex::element_aligned_tag, SimdAbi>::value);
166+
static_assert(!has_copy_to<ex::element_aligned_tag, T, ex::element_aligned_tag, SimdAbi>::value);
167+
}
168+
};
169+
170+
int main(int, char**) {
171+
test_all_simd_abi<CheckSimdCopyFrom>();
172+
test_all_simd_abi<CheckSimdCopyTo>();
173+
test_all_simd_abi<CheckSimdCopyTraits>();
174+
return 0;
175+
}

0 commit comments

Comments
 (0)