Skip to content

[libc++] <experimental/simd> Add copy functions for class simd/simd_mask #78935

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions libcxx/docs/Status/ParallelismProjects.csv
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Section,Description,Dependencies,Assignee,Complete
| `[parallel.simd.class] <https://wg21.link/N4808>`_, "`simd generate constructor <https://reviews.llvm.org/D159442>`_", None, Yin Zhang, |Complete|
| `[parallel.simd.class] <https://wg21.link/N4808>`_, "`simd load constructor <https://github.com/llvm/llvm-project/pull/76610>`_", None, Yin Zhang, |Complete|
| `[parallel.simd.class] <https://wg21.link/N4808>`_, "`simd subscript operators <https://github.com/llvm/llvm-project/pull/68960>`_", None, Yin Zhang, |Complete|
| `[parallel.simd.class] <https://wg21.link/N4808>`_, "`simd copy functions <https://github.com/llvm/llvm-project/pull/78935>`_", None, Yin Zhang, |Complete|
| `[parallel.simd.class] <https://wg21.link/N4808>`_, "Class template simd implementation", None, Yin Zhang, |In Progress|
| `[parallel.simd.nonmembers] <https://wg21.link/N4808>`_, "simd non-member operations", None, Yin Zhang, |In Progress|
| `[parallel.simd.mask.class] <https://wg21.link/N4808>`_, "`Class template simd_mask declaration and alias <https://reviews.llvm.org/D144362>`_", [parallel.simd.abi], Yin Zhang, |Complete|
Expand All @@ -33,5 +34,6 @@ Section,Description,Dependencies,Assignee,Complete
| `[parallel.simd.mask.class] <https://wg21.link/N4808>`_, "`simd_mask implicit type conversion constructor <https://github.com/llvm/llvm-project/pull/71132>`_", None, Yin Zhang, |Complete|
| `[parallel.simd.mask.class] <https://wg21.link/N4808>`_, "`simd_mask load constructor <https://github.com/llvm/llvm-project/pull/76610>`_", None, Yin Zhang, |Complete|
| `[parallel.simd.mask.class] <https://wg21.link/N4808>`_, "`simd_mask subscript operators <https://github.com/llvm/llvm-project/pull/68960>`_", None, Yin Zhang, |Complete|
| `[parallel.simd.mask.class] <https://wg21.link/N4808>`_, "`simd_mask copy functions <https://github.com/llvm/llvm-project/pull/78935>`_", None, Yin Zhang, |Complete|
| `[parallel.simd.mask.class] <https://wg21.link/N4808>`_, "Class template simd_mask implementation", None, Yin Zhang, |In Progress|
| `[parallel.simd.mask.nonmembers] <https://wg21.link/N4808>`_, "simd_mask non-member operations", None, Yin Zhang, |In Progress|
7 changes: 7 additions & 0 deletions libcxx/include/experimental/__simd/scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ struct __simd_operations<_Tp, simd_abi::__scalar> {
static _LIBCPP_HIDE_FROM_ABI void __load(_SimdStorage& __s, const _Up* __mem) noexcept {
__s.__data = static_cast<_Tp>(__mem[0]);
}

template <class _Up>
static _LIBCPP_HIDE_FROM_ABI void __store(_SimdStorage __s, _Up* __mem) noexcept {
*__mem = static_cast<_Up>(__s.__data);
}
};

template <class _Tp>
Expand All @@ -71,6 +76,8 @@ struct __mask_operations<_Tp, simd_abi::__scalar> {
static _LIBCPP_HIDE_FROM_ABI _MaskStorage __broadcast(bool __v) noexcept { return {__v}; }

static _LIBCPP_HIDE_FROM_ABI void __load(_MaskStorage& __s, const bool* __mem) noexcept { __s.__data = __mem[0]; }

static _LIBCPP_HIDE_FROM_ABI void __store(_MaskStorage __s, bool* __mem) noexcept { __mem[0] = __s.__data; }
};

} // namespace parallelism_v2
Expand Down
11 changes: 11 additions & 0 deletions libcxx/include/experimental/__simd/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,17 @@ class simd {
_Impl::__load(__s_, _Flags::template __apply<simd>(__mem));
}

// copy functions
template <class _Up, class _Flags, enable_if_t<__is_vectorizable_v<_Up> && is_simd_flag_type_v<_Flags>, int> = 0>
_LIBCPP_HIDE_FROM_ABI void copy_from(const _Up* __mem, _Flags) {
_Impl::__load(__s_, _Flags::template __apply<simd>(__mem));
}

template <class _Up, class _Flags, enable_if_t<__is_vectorizable_v<_Up> && is_simd_flag_type_v<_Flags>, int> = 0>
_LIBCPP_HIDE_FROM_ABI void copy_to(_Up* __mem, _Flags) const {
_Impl::__store(__s_, _Flags::template __apply<simd>(__mem));
}

// scalar access [simd.subscr]
_LIBCPP_HIDE_FROM_ABI reference operator[](size_t __i) noexcept { return reference(__s_, __i); }
_LIBCPP_HIDE_FROM_ABI value_type operator[](size_t __i) const noexcept { return __s_.__get(__i); }
Expand Down
11 changes: 11 additions & 0 deletions libcxx/include/experimental/__simd/simd_mask.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,17 @@ class simd_mask {
_Impl::__load(__s_, _Flags::template __apply<simd_mask>(__mem));
}

// copy functions
template <class _Flags, enable_if_t<is_simd_flag_type_v<_Flags>, int> = 0>
_LIBCPP_HIDE_FROM_ABI void copy_from(const value_type* __mem, _Flags) {
_Impl::__load(__s_, _Flags::template __apply<simd_mask>(__mem));
}

template <class _Flags, enable_if_t<is_simd_flag_type_v<_Flags>, int> = 0>
_LIBCPP_HIDE_FROM_ABI void copy_to(value_type* __mem, _Flags) const {
_Impl::__store(__s_, _Flags::template __apply<simd_mask>(__mem));
}

// scalar access [simd.mask.subscr]
_LIBCPP_HIDE_FROM_ABI reference operator[](size_t __i) noexcept { return reference(__s_, __i); }
_LIBCPP_HIDE_FROM_ABI value_type operator[](size_t __i) const noexcept { return __s_.__get(__i); }
Expand Down
11 changes: 11 additions & 0 deletions libcxx/include/experimental/__simd/vec_ext.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ struct __simd_operations<_Tp, simd_abi::__vec_ext<_Np>> {
for (size_t __i = 0; __i < _Np; __i++)
__s.__data[__i] = static_cast<_Tp>(__mem[__i]);
}

template <class _Up>
static _LIBCPP_HIDE_FROM_ABI void __store(_SimdStorage __s, _Up* __mem) noexcept {
for (size_t __i = 0; __i < _Np; __i++)
__mem[__i] = static_cast<_Up>(__s.__data[__i]);
}
};

template <class _Tp, int _Np>
Expand All @@ -99,6 +105,11 @@ struct __mask_operations<_Tp, simd_abi::__vec_ext<_Np>> {
for (size_t __i = 0; __i < _Np; __i++)
__s.__data[__i] = experimental::__set_all_bits<_Tp>(__mem[__i]);
}

static _LIBCPP_HIDE_FROM_ABI void __store(_MaskStorage __s, bool* __mem) noexcept {
for (size_t __i = 0; __i < _Np; __i++)
__mem[__i] = static_cast<bool>(__s.__data[__i]);
}
};

} // namespace parallelism_v2
Expand Down
173 changes: 173 additions & 0 deletions libcxx/test/std/experimental/simd/simd.class/simd_copy.pass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03, c++11, c++14

// FIXME: Fatal error with following targets (remove XFAIL when fixed):
// Pass-by-value arguments with alignment greater than register width are not supported.
// XFAIL: target=powerpc{{.*}}-ibm-aix7.2.5.7

// <experimental/simd>
//
// [simd.class]
// template<class U, class Flags> void copy_from(const U* mem, Flags);
// template<class U, class Flags> void copy_to(U* mem, Flags) const;

#include "../test_utils.h"

namespace ex = std::experimental::parallelism_v2;

template <class T, class SimdAbi, std::size_t array_size>
struct ElementAlignedCopyFromHelper {
template <class U>
void operator()() const {
U buffer[array_size];
for (size_t i = 0; i < array_size; ++i)
buffer[i] = static_cast<U>(i);
ex::simd<T, SimdAbi> origin_simd;
origin_simd.copy_from(buffer, ex::element_aligned_tag());
assert_simd_values_equal(origin_simd, buffer);
}
};

template <class T, class SimdAbi, std::size_t array_size>
struct VectorAlignedCopyFromHelper {
template <class U>
void operator()() const {
alignas(ex::memory_alignment_v<ex::simd<T, SimdAbi>, U>) U buffer[array_size];
for (size_t i = 0; i < array_size; ++i)
buffer[i] = static_cast<U>(i);
ex::simd<T, SimdAbi> origin_simd;
origin_simd.copy_from(buffer, ex::vector_aligned_tag());
assert_simd_values_equal(origin_simd, buffer);
}
};

template <class T, class SimdAbi, std::size_t array_size>
struct OveralignedCopyFromHelper {
template <class U>
void operator()() const {
alignas(bit_ceil(sizeof(U) + 1)) U buffer[array_size];
for (size_t i = 0; i < array_size; ++i)
buffer[i] = static_cast<U>(i);
ex::simd<T, SimdAbi> origin_simd;
origin_simd.copy_from(buffer, ex::overaligned_tag<bit_ceil(sizeof(U) + 1)>());
assert_simd_values_equal(origin_simd, buffer);
}
};

template <class T, std::size_t>
struct CheckSimdCopyFrom {
template <class SimdAbi>
void operator()() {
constexpr std::size_t array_size = ex::simd_size_v<T, SimdAbi>;

types::for_each(simd_test_types(), ElementAlignedCopyFromHelper<T, SimdAbi, array_size>());
types::for_each(simd_test_types(), VectorAlignedCopyFromHelper<T, SimdAbi, array_size>());
types::for_each(simd_test_types(), OveralignedCopyFromHelper<T, SimdAbi, array_size>());
}
};

template <class T, class SimdAbi, std::size_t array_size>
struct ElementAlignedCopyToHelper {
template <class U>
void operator()() const {
U buffer[array_size];
ex::simd<T, SimdAbi> origin_simd([](T i) { return i; });
origin_simd.copy_to(buffer, ex::element_aligned_tag());
assert_simd_values_equal(origin_simd, buffer);
}
};

template <class T, class SimdAbi, std::size_t array_size>
struct VectorAlignedCopyToHelper {
template <class U>
void operator()() const {
alignas(ex::memory_alignment_v<ex::simd<T, SimdAbi>, U>) U buffer[array_size];
ex::simd<T, SimdAbi> origin_simd([](T i) { return i; });
origin_simd.copy_to(buffer, ex::vector_aligned_tag());
assert_simd_values_equal(origin_simd, buffer);
}
};

template <class T, class SimdAbi, std::size_t array_size>
struct OveralignedCopyToHelper {
template <class U>
void operator()() const {
alignas(bit_ceil(sizeof(U) + 1)) U buffer[array_size];
ex::simd<T, SimdAbi> origin_simd([](T i) { return i; });
origin_simd.copy_to(buffer, ex::overaligned_tag<bit_ceil(sizeof(U) + 1)>());
assert_simd_values_equal(origin_simd, buffer);
}
};

template <class T, std::size_t>
struct CheckSimdCopyTo {
template <class SimdAbi>
void operator()() {
constexpr std::size_t array_size = ex::simd_size_v<T, SimdAbi>;

types::for_each(simd_test_types(), ElementAlignedCopyToHelper<T, SimdAbi, array_size>());
types::for_each(simd_test_types(), VectorAlignedCopyToHelper<T, SimdAbi, array_size>());
types::for_each(simd_test_types(), OveralignedCopyToHelper<T, SimdAbi, array_size>());
}
};

template <class U, class T, class Flags, class SimdAbi = ex::simd_abi::compatible<T>, class = void>
struct has_copy_from : std::false_type {};

template <class U, class T, class Flags, class SimdAbi>
struct has_copy_from<U,
T,
Flags,
SimdAbi,
std::void_t<decltype(std::declval<ex::simd<T, SimdAbi>>().copy_from(
std::declval<const U*>(), std::declval<Flags>()))>> : std::true_type {};

template <class U, class T, class Flags, class SimdAbi = ex::simd_abi::compatible<T>, class = void>
struct has_copy_to : std::false_type {};

template <class U, class T, class Flags, class SimdAbi>
struct has_copy_to<
U,
T,
Flags,
SimdAbi,
std::void_t<decltype(std::declval<ex::simd<T, SimdAbi>>().copy_to(std::declval<U*>(), std::declval<Flags>()))>>
: std::true_type {};

template <class T, std::size_t>
struct CheckSimdCopyTraits {
template <class SimdAbi>
void operator()() {
// These functions shall not participate in overload resolution unless
// is_simd_flag_type_v<Flags> is true, and
// U is a vectorizable type.
static_assert(has_copy_from<int, T, ex::element_aligned_tag, SimdAbi>::value);
static_assert(has_copy_to<int, T, ex::element_aligned_tag, SimdAbi>::value);

// is_simd_flag_type_v<Flags> is false
static_assert(!has_copy_from<int, T, T, SimdAbi>::value);
static_assert(!has_copy_to<int, T, T, SimdAbi>::value);
static_assert(!has_copy_from<int, T, SimdAbi, SimdAbi>::value);
static_assert(!has_copy_to<int, T, SimdAbi, SimdAbi>::value);

// U is not a vectorizable type.
static_assert(!has_copy_from<SimdAbi, T, ex::element_aligned_tag, SimdAbi>::value);
static_assert(!has_copy_to<SimdAbi, T, ex::element_aligned_tag, SimdAbi>::value);
static_assert(!has_copy_from<ex::element_aligned_tag, T, ex::element_aligned_tag, SimdAbi>::value);
static_assert(!has_copy_to<ex::element_aligned_tag, T, ex::element_aligned_tag, SimdAbi>::value);
}
};

int main(int, char**) {
test_all_simd_abi<CheckSimdCopyFrom>();
test_all_simd_abi<CheckSimdCopyTo>();
test_all_simd_abi<CheckSimdCopyTraits>();
return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03, c++11, c++14

// <experimental/simd>
//
// [simd.class]
// template<class Flags> void copy_from(const value_type* mem, Flags);
// template<class Flags> void copy_to(value_type* mem, Flags);

#include "../test_utils.h"

namespace ex = std::experimental::parallelism_v2;

template <class T, std::size_t>
struct CheckSimdMaskCopyFrom {
template <class SimdAbi>
void operator()() {
constexpr std::size_t array_size = ex::simd_size_v<T, SimdAbi>;

// element aligned tag
constexpr std::size_t element_alignas_size = alignof(bool);
alignas(element_alignas_size) bool element_buffer[array_size];
for (size_t i = 0; i < array_size; ++i)
element_buffer[i] = static_cast<bool>(i % 2);
ex::simd_mask<T, SimdAbi> element_mask;
element_mask.copy_from(element_buffer, ex::element_aligned_tag());
assert_simd_mask_values_equal(element_mask, element_buffer);

// vector aligned tag
constexpr std::size_t vector_alignas_size = ex::memory_alignment_v<ex::simd_mask<T, SimdAbi>>;
alignas(vector_alignas_size) bool vector_buffer[array_size];
for (size_t i = 0; i < array_size; ++i)
vector_buffer[i] = static_cast<bool>(i % 2);
ex::simd_mask<T, SimdAbi> vector_mask;
vector_mask.copy_from(vector_buffer, ex::vector_aligned_tag());
assert_simd_mask_values_equal(vector_mask, vector_buffer);

// overaligned tag
constexpr std::size_t over_alignas_size = bit_ceil(sizeof(bool) + 1);
alignas(over_alignas_size) bool overaligned_buffer[array_size];
for (size_t i = 0; i < array_size; ++i)
overaligned_buffer[i] = static_cast<bool>(i % 2);
ex::simd_mask<T, SimdAbi> overaligned_mask;
overaligned_mask.copy_from(overaligned_buffer, ex::overaligned_tag<over_alignas_size>());
assert_simd_mask_values_equal(overaligned_mask, overaligned_buffer);
}
};

template <class T, std::size_t>
struct CheckSimdMaskCopyTo {
template <class SimdAbi>
void operator()() {
constexpr std::size_t array_size = ex::simd_size_v<T, SimdAbi>;

// element aligned tag
constexpr std::size_t element_alignas_size = alignof(bool);
alignas(element_alignas_size) bool element_buffer[array_size];
ex::simd_mask<T, SimdAbi> element_mask(true);
element_mask.copy_to(element_buffer, ex::element_aligned_tag());
assert_simd_mask_values_equal(element_mask, element_buffer);

// vector aligned tag
constexpr std::size_t vector_alignas_size = ex::memory_alignment_v<ex::simd_mask<T, SimdAbi>>;
alignas(vector_alignas_size) bool vector_buffer[array_size];
ex::simd_mask<T, SimdAbi> vector_mask(false);
vector_mask.copy_to(vector_buffer, ex::vector_aligned_tag());
assert_simd_mask_values_equal(vector_mask, vector_buffer);

// overaligned tag
constexpr std::size_t over_alignas_size = bit_ceil(sizeof(bool) + 1);
alignas(over_alignas_size) bool overaligned_buffer[array_size];
ex::simd_mask<T, SimdAbi> overaligned_mask(true);
overaligned_mask.copy_to(overaligned_buffer, ex::overaligned_tag<over_alignas_size>());
assert_simd_mask_values_equal(overaligned_mask, overaligned_buffer);
}
};

template <class T, class Flags, class SimdAbi = ex::simd_abi::compatible<T>, class = void>
struct has_copy_from : std::false_type {};

template <class T, class Flags, class SimdAbi>
struct has_copy_from<T,
Flags,
SimdAbi,
std::void_t<decltype(std::declval<ex::simd_mask<T, SimdAbi>>().copy_from(
std::declval<const bool*>(), std::declval<Flags>()))>> : std::true_type {};

template <class T, class Flags, class SimdAbi = ex::simd_abi::compatible<T>, class = void>
struct has_copy_to : std::false_type {};

template <class T, class Flags, class SimdAbi>
struct has_copy_to<T,
Flags,
SimdAbi,
std::void_t<decltype(std::declval<ex::simd_mask<T, SimdAbi>>().copy_to(
std::declval<bool*>(), std::declval<Flags>()))>> : std::true_type {};

template <class T, std::size_t>
struct CheckSimdMaskCopyTraits {
template <class SimdAbi>
void operator()() {
// These functions shall not participate in overload resolution unless
// is_simd_flag_type_v<Flags> is true
static_assert(has_copy_from<T, ex::element_aligned_tag, SimdAbi>::value);
static_assert(has_copy_to<T, ex::element_aligned_tag, SimdAbi>::value);

// is_simd_flag_type_v<Flags> is false
static_assert(!has_copy_from<T, T, SimdAbi>::value);
static_assert(!has_copy_to<T, T, SimdAbi>::value);
static_assert(!has_copy_from<T, SimdAbi, SimdAbi>::value);
static_assert(!has_copy_to<T, SimdAbi, SimdAbi>::value);
}
};

int main(int, char**) {
test_all_simd_abi<CheckSimdMaskCopyFrom>();
test_all_simd_abi<CheckSimdMaskCopyTo>();
test_all_simd_abi<CheckSimdMaskCopyTraits>();
return 0;
}