Skip to content

[libc++] Vectorize std::mismatch with trivially equality comparable types #87716

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libcxx/include/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ set(files
__ios/fpos.h
__iterator/access.h
__iterator/advance.h
__iterator/aliasing_iterator.h
__iterator/back_insert_iterator.h
__iterator/bounded_iter.h
__iterator/common_iterator.h
Expand Down
52 changes: 40 additions & 12 deletions libcxx/include/__algorithm/mismatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <__algorithm/unwrap_iter.h>
#include <__config>
#include <__functional/identity.h>
#include <__iterator/aliasing_iterator.h>
#include <__type_traits/desugars_to.h>
#include <__type_traits/invoke.h>
#include <__type_traits/is_constant_evaluated.h>
Expand Down Expand Up @@ -55,18 +56,13 @@ __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Pro

#if _LIBCPP_VECTORIZE_ALGORITHMS

template <class _Tp,
class _Pred,
class _Proj1,
class _Proj2,
__enable_if_t<is_integral<_Tp>::value && __desugars_to_v<__equal_tag, _Pred, _Tp, _Tp> &&
__is_identity<_Proj1>::value && __is_identity<_Proj2>::value,
int> = 0>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*>
__mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) {
template <class _Iter>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter, _Iter>
__mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
using __value_type = __iter_value_type<_Iter>;
constexpr size_t __unroll_count = 4;
constexpr size_t __vec_size = __native_vector_size<_Tp>;
using __vec = __simd_vector<_Tp, __vec_size>;
constexpr size_t __vec_size = __native_vector_size<__value_type>;
using __vec = __simd_vector<__value_type, __vec_size>;

if (!__libcpp_is_constant_evaluated()) {
auto __orig_first1 = __first1;
Expand Down Expand Up @@ -116,9 +112,41 @@ __mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred& __pred, _Proj1& __
} // else loop over the elements individually
}

return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj1, __proj2);
__equal_to __pred;
__identity __proj;
return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj, __proj);
}

template <class _Tp,
class _Pred,
class _Proj1,
class _Proj2,
__enable_if_t<is_integral<_Tp>::value && __desugars_to_v<__equal_tag, _Pred, _Tp, _Tp> &&
__is_identity<_Proj1>::value && __is_identity<_Proj2>::value,
int> = 0>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*>
__mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred&, _Proj1&, _Proj2&) {
return std::__mismatch_vectorized(__first1, __last1, __first2);
}

template <class _Tp,
class _Pred,
class _Proj1,
class _Proj2,
__enable_if_t<!is_integral<_Tp>::value && __desugars_to_v<__equal_tag, _Pred, _Tp, _Tp> &&
__is_identity<_Proj1>::value && __is_identity<_Proj2>::value &&
__can_map_to_integer_v<_Tp> && __libcpp_is_trivially_equality_comparable<_Tp, _Tp>::value,
int> = 0>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*>
__mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) {
if (__libcpp_is_constant_evaluated()) {
return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj1, __proj2);
} else {
using _Iter = __aliasing_iterator<_Tp*, __get_as_integer_type_t<_Tp>>;
auto __ret = std::__mismatch_vectorized(_Iter(__first1), _Iter(__last1), _Iter(__first2));
return {__ret.first.__base(), __ret.second.__base()};
}
}
#endif // _LIBCPP_VECTORIZE_ALGORITHMS

template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
Expand Down
34 changes: 31 additions & 3 deletions libcxx/include/__algorithm/simd_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,34 @@ _LIBCPP_PUSH_MACROS

_LIBCPP_BEGIN_NAMESPACE_STD

template <class _Tp>
inline constexpr bool __can_map_to_integer_v =
sizeof(_Tp) == alignof(_Tp) && (sizeof(_Tp) == 1 || sizeof(_Tp) == 2 || sizeof(_Tp) == 4 || sizeof(_Tp) == 8);

template <size_t _TypeSize>
struct __get_as_integer_type_impl;

template <>
struct __get_as_integer_type_impl<1> {
using type = uint8_t;
};

template <>
struct __get_as_integer_type_impl<2> {
using type = uint16_t;
};
template <>
struct __get_as_integer_type_impl<4> {
using type = uint32_t;
};
template <>
struct __get_as_integer_type_impl<8> {
using type = uint64_t;
};

template <class _Tp>
using __get_as_integer_type_t = typename __get_as_integer_type_impl<sizeof(_Tp)>::type;

// This isn't specialized for 64 byte vectors on purpose. They have the potential to significantly reduce performance
// in mixed simd/non-simd workloads and don't provide any performance improvement for currently vectorized algorithms
// as far as benchmarks are concerned.
Expand Down Expand Up @@ -80,10 +108,10 @@ template <class _VecT>
using __simd_vector_underlying_type_t = decltype(std::__simd_vector_underlying_type_impl(_VecT{}));

// This isn't inlined without always_inline when loading chars.
template <class _VecT, class _Tp>
_LIBCPP_NODISCARD _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT __load_vector(const _Tp* __ptr) noexcept {
template <class _VecT, class _Iter>
_LIBCPP_NODISCARD _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT __load_vector(_Iter __iter) noexcept {
return [=]<size_t... _Indices>(index_sequence<_Indices...>) _LIBCPP_ALWAYS_INLINE noexcept {
return _VecT{__ptr[_Indices]...};
return _VecT{__iter[_Indices]...};
}(make_index_sequence<__simd_vector_size_v<_VecT>>{});
}

Expand Down
127 changes: 127 additions & 0 deletions libcxx/include/__iterator/aliasing_iterator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef _LIBCPP___ITERATOR_ALIASING_ITERATOR_H
#define _LIBCPP___ITERATOR_ALIASING_ITERATOR_H

#include <__config>
#include <__iterator/iterator_traits.h>
#include <__memory/pointer_traits.h>
#include <__type_traits/is_trivial.h>
#include <cstddef>

#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif

// This iterator wrapper is used to type-pun an iterator to return a different type. This is done without UB by not
// actually punning the type, but instead inspecting the object representation of the base type and copying that into
// an instance of the alias type. For that reason the alias type has to be trivial. The alias is returned as a prvalue
// when derferencing the iterator, since it is temporary storage. This wrapper is used to vectorize some algorithms.

_LIBCPP_BEGIN_NAMESPACE_STD

template <class _BaseIter, class _Alias>
struct __aliasing_iterator_wrapper {
class __iterator {
_BaseIter __base_ = nullptr;

using __iter_traits = iterator_traits<_BaseIter>;
using __base_value_type = typename __iter_traits::value_type;

static_assert(__has_random_access_iterator_category<_BaseIter>::value,
"The base iterator has to be a random access iterator!");

public:
using iterator_category = random_access_iterator_tag;
using value_type = _Alias;
using difference_type = ptrdiff_t;
using reference = value_type&;
using pointer = value_type*;

static_assert(is_trivial<value_type>::value);
static_assert(sizeof(__base_value_type) == sizeof(value_type));

_LIBCPP_HIDE_FROM_ABI __iterator() = default;
_LIBCPP_HIDE_FROM_ABI __iterator(_BaseIter __base) _NOEXCEPT : __base_(__base) {}

_LIBCPP_HIDE_FROM_ABI __iterator& operator++() _NOEXCEPT {
++__base_;
return *this;
}

_LIBCPP_HIDE_FROM_ABI __iterator operator++(int) _NOEXCEPT {
__iterator __tmp(*this);
++__base_;
return __tmp;
}

_LIBCPP_HIDE_FROM_ABI __iterator& operator--() _NOEXCEPT {
--__base_;
return *this;
}

_LIBCPP_HIDE_FROM_ABI __iterator operator--(int) _NOEXCEPT {
__iterator __tmp(*this);
--__base_;
return __tmp;
}

_LIBCPP_HIDE_FROM_ABI friend __iterator operator+(__iterator __iter, difference_type __n) _NOEXCEPT {
return __iterator(__iter.__base_ + __n);
}

_LIBCPP_HIDE_FROM_ABI friend __iterator operator+(difference_type __n, __iterator __iter) _NOEXCEPT {
return __iterator(__n + __iter.__base_);
}

_LIBCPP_HIDE_FROM_ABI __iterator& operator+=(difference_type __n) _NOEXCEPT {
__base_ += __n;
return *this;
}

_LIBCPP_HIDE_FROM_ABI friend __iterator operator-(__iterator __iter, difference_type __n) _NOEXCEPT {
return __iterator(__iter.__base_ - __n);
}

_LIBCPP_HIDE_FROM_ABI friend difference_type operator-(__iterator __lhs, __iterator __rhs) _NOEXCEPT {
return __lhs.__base_ - __rhs.__base_;
}

_LIBCPP_HIDE_FROM_ABI __iterator& operator-=(difference_type __n) _NOEXCEPT {
__base_ -= __n;
return *this;
}

_LIBCPP_HIDE_FROM_ABI _BaseIter __base() const _NOEXCEPT { return __base_; }

_LIBCPP_HIDE_FROM_ABI _Alias operator*() const _NOEXCEPT {
_Alias __val;
__builtin_memcpy(&__val, std::__to_address(__base_), sizeof(value_type));
return __val;
}

_LIBCPP_HIDE_FROM_ABI value_type operator[](difference_type __n) const _NOEXCEPT { return *(*this + __n); }

_LIBCPP_HIDE_FROM_ABI friend bool operator==(const __iterator& __lhs, const __iterator& __rhs) _NOEXCEPT {
return __lhs.__base_ == __rhs.__base_;
}

_LIBCPP_HIDE_FROM_ABI friend bool operator!=(const __iterator& __lhs, const __iterator& __rhs) _NOEXCEPT {
return __lhs.__base_ != __rhs.__base_;
}
};
};

// This is required to avoid ADL instantiations on _BaseT
template <class _BaseT, class _Alias>
using __aliasing_iterator = typename __aliasing_iterator_wrapper<_BaseT, _Alias>::__iterator;

_LIBCPP_END_NAMESPACE_STD

#endif // _LIBCPP___ITERATOR_ALIASING_ITERATOR_H
2 changes: 2 additions & 0 deletions libcxx/include/__type_traits/is_equality_comparable.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ struct __is_equality_comparable<_Tp, _Up, __void_t<decltype(std::declval<_Tp>()
// pointers that don't have the same type (ignoring cv-qualifiers): pointers to virtual bases are equality comparable,
// but don't have the same bit-pattern. An exception to this is comparing to a void-pointer. There the bit-pattern is
// always compared.
// objects with padding bytes: since objects with padding bytes may compare equal, even though their object
// representation may not be equivalent.

template <class _Tp, class _Up, class = void>
struct __libcpp_is_trivially_equality_comparable_impl : false_type {};
Expand Down
2 changes: 2 additions & 0 deletions libcxx/include/module.modulemap
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,7 @@ module std_private_algorithm_minmax_element [system
module std_private_algorithm_mismatch [system] {
header "__algorithm/mismatch.h"
export std_private_algorithm_simd_utils
export std_private_iterator_aliasing_iterator
}
module std_private_algorithm_move [system] { header "__algorithm/move.h" }
module std_private_algorithm_move_backward [system] { header "__algorithm/move_backward.h" }
Expand Down Expand Up @@ -1390,6 +1391,7 @@ module std_private_iosfwd_streambuf_fwd [system] { header "__fwd/streambuf.h" }

module std_private_iterator_access [system] { header "__iterator/access.h" }
module std_private_iterator_advance [system] { header "__iterator/advance.h" }
module std_private_iterator_aliasing_iterator [system] { header "__iterator/aliasing_iterator.h" }
module std_private_iterator_back_insert_iterator [system] { header "__iterator/back_insert_iterator.h" }
module std_private_iterator_bounded_iter [system] { header "__iterator/bounded_iter.h" }
module std_private_iterator_common_iterator [system] { header "__iterator/common_iterator.h" }
Expand Down
45 changes: 45 additions & 0 deletions libcxx/test/libcxx/iterators/aliasing_iterator.pass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// ADDITIONAL_COMPILE_FLAGS(clang): -Wprivate-header

#include <__iterator/aliasing_iterator.h>
#include <cassert>

struct NonTrivial {
int i_;

NonTrivial(int i) : i_(i) {}
NonTrivial(const NonTrivial& other) : i_(other.i_) {}

NonTrivial& operator=(const NonTrivial& other) {
i_ = other.i_;
return *this;
}

~NonTrivial() {}
};

int main(int, char**) {
{
NonTrivial arr[] = {1, 2, 3, 4};
std::__aliasing_iterator<NonTrivial*, int> iter(arr);

assert(*iter == 1);
assert(iter[0] == 1);
assert(iter[1] == 2);
++iter;
assert(*iter == 2);
assert(iter[-1] == 1);
assert(iter.__base() == arr + 1);
assert(iter == iter);
assert(iter != (iter + 1));
}

return 0;
}
Loading
Loading