Skip to content

[SYCL] Fix corner case when using short or char with exclusive scan #10270

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion sycl/include/sycl/group_algorithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,13 @@ template <template <typename> typename F> struct get_scalar_binary_op<F<void>> {
using type = F<void>;
};

// ---- is_max_or_min
template <typename T> struct is_max_or_min : std::false_type {};
template <typename T>
struct is_max_or_min<sycl::maximum<T>> : std::true_type {};
template <typename T>
struct is_max_or_min<sycl::minimum<T>> : std::true_type {};

// ---- identity_for_ga_op
// the group algorithms support std::complex, limited to sycl::plus operation
// get the correct identity for group algorithm operation.
Expand Down Expand Up @@ -678,8 +685,34 @@ exclusive_scan_over_group(Group g, T x, BinaryOperation binary_op) {
sycl::detail::ExtractMask(sycl::detail::GetMask(g))[0]);
}
#endif
return sycl::detail::calc<__spv::GroupOperation::ExclusiveScan>(
// For the first work item in the group, we cannot return the result
// of calc when T is a signed char or short type and the
// BinaryOperation is maximum or minimum. calc uses SPIRV group
// collective instructions, which only operate on 32 or 64 bit
// integers. So, when using calc with a short or char type, the
// argument is converted to a 32 bit integer, the 32 bit group
// operation is performed, and then converted back to the original
// short or char type. For an exclusive scan, the first work item
// returns the identity for the supplied operation. However, the
// identity of a 32 bit signed integer maximum or minimum when
// converted to a signed char or short does not correspond to the
// identity of a signed char or short maximum or minimum. For
// example, the identity of a signed 32 bit maximum is
// INT_MIN=-2**31, and when converted to a signed char, results in
// 0. However, the identity of a signed char maximum is
// SCHAR_MIN=-2**7. Therefore, we need the following check to
// circumvent this issue.
auto res = sycl::detail::calc<__spv::GroupOperation::ExclusiveScan>(
g, typename sycl::detail::GroupOpTag<T>::type(), x, binary_op);
if constexpr ((std::is_same_v<signed char, T> ||
std::is_same_v<signed short, T> ||
(std::is_signed_v<char> && std::is_same_v<char, T>)) &&
detail::is_max_or_min<BinaryOperation>::value) {
auto local_id = sycl::detail::get_local_linear_id(g);
if (local_id == 0)
return sycl::known_identity_v<BinaryOperation, T>;
}
return res;
#else
(void)g;
throw sycl::exception(make_error_code(errc::feature_not_supported),
Expand Down
61 changes: 61 additions & 0 deletions sycl/test-e2e/Regression/exclusive-scan-char-short.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

// This test ensures the result computed by exclusive_scan_over_group
// for the first work item when given a short or char argument with
// the maximum or minimum operator is computed correctly.
#include <numeric>
#include <sycl/sycl.hpp>

using namespace sycl;
queue q;
int cur_test = 0;
int n_fail = 0;

template <typename T, typename OpT> void test() {
auto op = OpT();
auto init = sycl::known_identity_v<decltype(op), T>;
auto *p = malloc_shared<T>(1, q);
*p = 0;
T ref;
std::exclusive_scan(p, p + 1, &ref, init, op);
range r(1);
q.parallel_for(nd_range(r, r), [=](nd_item<1> it) {
auto g = it.get_group();
*p = exclusive_scan_over_group(g, *p, op);
}).wait();

if (*p != ref) {
std::cout << "test " << cur_test << " fail\n";
std::cout << "got: " << int(*p) << "\n";
std::cout << "expected: " << int(ref) << "\n\n";
++n_fail;
}
++cur_test;
free(p, q);
}

int main() {
test<char, sycl::maximum<char>>();
test<signed char, sycl::maximum<signed char>>();
test<unsigned char, sycl::maximum<unsigned char>>();
test<char, sycl::maximum<void>>();
test<signed char, sycl::maximum<void>>();
test<unsigned char, sycl::maximum<void>>();
test<short, sycl::maximum<short>>();
test<unsigned short, sycl::maximum<unsigned short>>();
test<short, sycl::maximum<void>>();
test<unsigned short, sycl::maximum<void>>();

test<char, sycl::minimum<char>>();
test<signed char, sycl::minimum<signed char>>();
test<unsigned char, sycl::minimum<unsigned char>>();
test<char, sycl::minimum<void>>();
test<signed char, sycl::minimum<void>>();
test<unsigned char, sycl::minimum<void>>();
test<short, sycl::minimum<short>>();
test<unsigned short, sycl::minimum<unsigned short>>();
test<short, sycl::minimum<void>>();
test<unsigned short, sycl::minimum<void>>();
return n_fail != 0;
}