Skip to content

Commit a5760aa

Browse files
authored
[SYCL] Fix performance regression in parallel_for with using id class (#5385)
In #5118 conversion for user defined types which are implicitly converted from sycl::items was added. It caused regression in case when sycl::id type is used, because with this change it converts to sycl::item but in fact it is not needed. That is why sycl::id class case should be checked explicitly.
1 parent ab5de79 commit a5760aa

File tree

3 files changed

+44
-5
lines changed

3 files changed

+44
-5
lines changed

sycl/include/CL/sycl/detail/stl_type_traits.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,12 @@ struct is_output_iterator<T, output_iterator_requirements<T>> {
7878
static constexpr bool value = true;
7979
};
8080

81+
template <typename T, typename U>
82+
inline constexpr bool is_same_v = std::is_same<T, U>::value;
83+
84+
template <typename T, typename U>
85+
inline constexpr bool is_convertible_v = std::is_convertible<T, U>::value;
86+
8187
} // namespace detail
8288
} // namespace sycl
8389
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/include/CL/sycl/handler.hpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -917,11 +917,14 @@ class __SYCL_EXPORT handler {
917917
}
918918

919919
template <int Dims, typename LambdaArgType> struct TransformUserItemType {
920-
using type = typename std::conditional<
921-
std::is_convertible<nd_item<Dims>, LambdaArgType>::value, nd_item<Dims>,
922-
typename std::conditional<
923-
std::is_convertible<item<Dims>, LambdaArgType>::value, item<Dims>,
924-
LambdaArgType>::type>::type;
920+
using type = typename std::conditional_t<
921+
detail::is_same_v<id<Dims>, LambdaArgType>, LambdaArgType,
922+
typename std::conditional_t<
923+
detail::is_convertible_v<nd_item<Dims>, LambdaArgType>,
924+
nd_item<Dims>,
925+
typename std::conditional_t<
926+
detail::is_convertible_v<item<Dims>, LambdaArgType>, item<Dims>,
927+
LambdaArgType>>>;
925928
};
926929

927930
/// Defines and invokes a SYCL kernel function for the specified range.
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// RUN: %clangxx -fsycl -fsycl-device-only -D__SYCL_INTERNAL_API -O0 -c -emit-llvm -S -o - %s | FileCheck %s
2+
3+
// This test performs basic type check for sycl::id that is used in result type.
4+
5+
#include <CL/sycl.hpp>
6+
#include <iostream>
7+
8+
int main() {
9+
sycl::queue q;
10+
11+
// Initialize data array
12+
const int sz = 16;
13+
int data[sz] = {0};
14+
for (int i = 0; i < sz; ++i) {
15+
data[i] = i;
16+
}
17+
18+
// Check user defined sycl::item wrapper
19+
sycl::buffer<int> data_buf(data, sz);
20+
q.submit([&](sycl::handler &h) {
21+
auto buf_acc = data_buf.get_access<sycl::access::mode::read_write>(h);
22+
h.parallel_for(
23+
sycl::range<1>{sz},
24+
// CHECK: cl{{.*}}sycl{{.*}}detail{{.*}}RoundedRangeKernel{{.*}}id{{.*}}main{{.*}}handler
25+
[=](sycl::id<1> item) { buf_acc[item] += 1; });
26+
});
27+
q.wait();
28+
29+
return 0;
30+
}

0 commit comments

Comments
 (0)