Skip to content

Commit d35a815

Browse files
Pennycookbader
authored andcommitted
[SYCL] Define collective functors using std (#840)
intel::plus should be an alias to std::plus. intel::minimum/maximum should use std::less/std::greater. Align collective functors with C++14. void specializations should be transparent, accepting arbitrary types and using perfect forwarding. Guard some sub-group tests with __cplusplus version check Signed-off-by: John Pennycook <[email protected]>
1 parent 85a3c73 commit d35a815

File tree

3 files changed

+59
-34
lines changed

3 files changed

+59
-34
lines changed

sycl/include/CL/sycl/intel/functional.hpp

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,44 +7,51 @@
77
//===----------------------------------------------------------------------===//
88

99
#pragma once
10+
#include <functional>
1011

1112
namespace cl {
1213
namespace sycl {
1314
namespace intel {
1415

1516
template <typename T = void> struct minimum {
1617
T operator()(const T &lhs, const T &rhs) const {
17-
return (lhs <= rhs) ? lhs : rhs;
18+
return std::less<T>()(lhs, rhs) ? lhs : rhs;
1819
}
1920
};
2021

22+
#if __cplusplus >= 201402L
2123
template <> struct minimum<void> {
22-
template <typename T> T operator()(const T &lhs, const T &rhs) const {
23-
return (lhs <= rhs) ? lhs : rhs;
24+
struct is_transparent {};
25+
template <typename T, typename U>
26+
auto operator()(T &&lhs, U &&rhs) const ->
27+
typename std::common_type<T &&, U &&>::type {
28+
return std::less<>()(std::forward<const T>(lhs), std::forward<const U>(rhs))
29+
? std::forward<T>(lhs)
30+
: std::forward<U>(rhs);
2431
}
2532
};
33+
#endif
2634

2735
template <typename T = void> struct maximum {
2836
T operator()(const T &lhs, const T &rhs) const {
29-
return (lhs >= rhs) ? lhs : rhs;
37+
return std::greater<T>()(lhs, rhs) ? lhs : rhs;
3038
}
3139
};
3240

41+
#if __cplusplus >= 201402L
3342
template <> struct maximum<void> {
34-
template <typename T> T operator()(const T &lhs, const T &rhs) const {
35-
return (lhs >= rhs) ? lhs : rhs;
43+
struct is_transparent {};
44+
template <typename T, typename U>
45+
auto operator()(T &&lhs, U &&rhs) const ->
46+
typename std::common_type<T &&, U &&>::type {
47+
return std::greater<>()(std::forward<const T>(lhs), std::forward<const U>(rhs))
48+
? std::forward<T>(lhs)
49+
: std::forward<U>(rhs);
3650
}
3751
};
52+
#endif
3853

39-
template <typename T = void> struct plus {
40-
T operator()(const T &lhs, const T &rhs) const { return lhs + rhs; }
41-
};
42-
43-
template <> struct plus<void> {
44-
template <typename T> T operator()(const T &lhs, const T &rhs) const {
45-
return lhs + rhs;
46-
}
47-
};
54+
template <typename T = void> using plus = std::plus<T>;
4855

4956
} // namespace intel
5057
} // namespace sycl

sycl/test/sub_group/reduce.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// RUN: %clangxx -fsycl %s -o %t.out
2-
// RUN: %clangxx -fsycl -D SG_GPU %s -o %t_gpu.out
1+
// RUN: %clangxx -fsycl -std=c++14 %s -o %t.out
2+
// RUN: %clangxx -fsycl -std=c++14 -D SG_GPU %s -o %t_gpu.out
33
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
44
// RUN: %CPU_RUN_PLACEHOLDER %t.out
55
// RUN: %GPU_RUN_PLACEHOLDER %t_gpu.out
@@ -73,19 +73,24 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
7373
}
7474

7575
check_op<T>(Queue, T(L), intel::plus<T>(), false, G, L);
76-
check_op<T>(Queue, T(L), intel::plus<>(), false, G, L);
7776
check_op<T>(Queue, T(0), intel::plus<T>(), true, G, L);
78-
check_op<T>(Queue, T(0), intel::plus<>(), true, G, L);
7977

8078
check_op<T>(Queue, T(0), intel::minimum<T>(), false, G, L);
81-
check_op<T>(Queue, T(0), intel::minimum<>(), false, G, L);
8279
check_op<T>(Queue, T(G), intel::minimum<T>(), true, G, L);
83-
check_op<T>(Queue, T(G), intel::minimum<>(), true, G, L);
8480

8581
check_op<T>(Queue, T(G), intel::maximum<T>(), false, G, L);
86-
check_op<T>(Queue, T(G), intel::maximum<>(), false, G, L);
8782
check_op<T>(Queue, T(0), intel::maximum<T>(), true, G, L);
83+
84+
#if __cplusplus >= 201402L
85+
check_op<T>(Queue, T(L), intel::plus<>(), false, G, L);
86+
check_op<T>(Queue, T(0), intel::plus<>(), true, G, L);
87+
88+
check_op<T>(Queue, T(0), intel::minimum<>(), false, G, L);
89+
check_op<T>(Queue, T(G), intel::minimum<>(), true, G, L);
90+
91+
check_op<T>(Queue, T(G), intel::maximum<>(), false, G, L);
8892
check_op<T>(Queue, T(0), intel::maximum<>(), true, G, L);
93+
#endif
8994
}
9095

9196
int main() {

sycl/test/sub_group/scan.cpp

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// RUN: %clangxx -fsycl %s -o %t.out
2-
// RUN: %clangxx -fsycl -D SG_GPU %s -o %t_gpu.out
1+
// RUN: %clangxx -fsycl -std=c++14 %s -o %t.out
2+
// RUN: %clangxx -fsycl -std=c++14 -D SG_GPU %s -o %t_gpu.out
33
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
44
// RUN: %CPU_RUN_PLACEHOLDER %t.out
55
// RUN: %GPU_RUN_PLACEHOLDER %t_gpu.out
@@ -81,39 +81,52 @@ template <typename T> void check(queue &Queue, size_t G = 120, size_t L = 60) {
8181
}
8282

8383
check_op<T>(Queue, T(L), intel::plus<T>(), false, G, L);
84-
check_op<T>(Queue, T(L), intel::plus<>(), false, G, L);
8584
check_op<T>(Queue, T(0), intel::plus<T>(), true, G, L);
86-
check_op<T>(Queue, T(0), intel::plus<>(), true, G, L);
8785

8886
check_op<T>(Queue, T(0), intel::minimum<T>(), false, G, L);
89-
check_op<T>(Queue, T(0), intel::minimum<>(), false, G, L);
9087
if (std::is_floating_point<T>::value ||
9188
std::is_same<T, cl::sycl::half>::value) {
9289
check_op<T>(Queue, std::numeric_limits<T>::infinity(), intel::minimum<T>(),
9390
true, G, L);
94-
check_op<T>(Queue, std::numeric_limits<T>::infinity(), intel::minimum<>(),
95-
true, G, L);
9691
} else {
9792
check_op<T>(Queue, std::numeric_limits<T>::max(), intel::minimum<T>(), true,
9893
G, L);
99-
check_op<T>(Queue, std::numeric_limits<T>::max(), intel::minimum<>(), true,
100-
G, L);
10194
}
10295

10396
check_op<T>(Queue, T(G), intel::maximum<T>(), false, G, L);
104-
check_op<T>(Queue, T(G), intel::maximum<>(), false, G, L);
10597
if (std::is_floating_point<T>::value ||
10698
std::is_same<T, cl::sycl::half>::value) {
10799
check_op<T>(Queue, -std::numeric_limits<T>::infinity(), intel::maximum<T>(),
108100
true, G, L);
109-
check_op<T>(Queue, -std::numeric_limits<T>::infinity(), intel::maximum<>(),
110-
true, G, L);
111101
} else {
112102
check_op<T>(Queue, std::numeric_limits<T>::min(), intel::maximum<T>(), true,
113103
G, L);
104+
}
105+
106+
#if __cplusplus >= 201402L
107+
check_op<T>(Queue, T(L), intel::plus<>(), false, G, L);
108+
check_op<T>(Queue, T(0), intel::plus<>(), true, G, L);
109+
110+
check_op<T>(Queue, T(0), intel::minimum<>(), false, G, L);
111+
if (std::is_floating_point<T>::value ||
112+
std::is_same<T, cl::sycl::half>::value) {
113+
check_op<T>(Queue, std::numeric_limits<T>::infinity(), intel::minimum<>(),
114+
true, G, L);
115+
} else {
116+
check_op<T>(Queue, std::numeric_limits<T>::max(), intel::minimum<>(), true,
117+
G, L);
118+
}
119+
120+
check_op<T>(Queue, T(G), intel::maximum<>(), false, G, L);
121+
if (std::is_floating_point<T>::value ||
122+
std::is_same<T, cl::sycl::half>::value) {
123+
check_op<T>(Queue, -std::numeric_limits<T>::infinity(), intel::maximum<>(),
124+
true, G, L);
125+
} else {
114126
check_op<T>(Queue, std::numeric_limits<T>::min(), intel::maximum<>(), true,
115127
G, L);
116128
}
129+
#endif
117130
}
118131

119132
int main() {

0 commit comments

Comments
 (0)