Skip to content

Commit 2e7c080

Browse files
committed
Remove the first version of API and update feature macro value
1 parent b6aceb7 commit 2e7c080

File tree

3 files changed

+3
-230
lines changed

3 files changed

+3
-230
lines changed

sycl/include/sycl/ext/oneapi/experimental/group_helpers_sorters.hpp

Lines changed: 0 additions & 152 deletions
Original file line numberDiff line numberDiff line change
@@ -97,159 +97,8 @@ template <typename Group, size_t Extent> class group_with_scratchpad {
9797
sycl::span<std::byte, Extent> get_memory() const { return scratch; }
9898
};
9999

100-
// Default sorter provided by the first version of the extension specification.
101-
template <typename Compare = std::less<>> class default_sorter {
102-
Compare comp;
103-
sycl::span<std::byte> scratch;
104-
105-
public:
106-
template <size_t Extent>
107-
default_sorter(sycl::span<std::byte, Extent> scratch_,
108-
Compare comp_ = Compare())
109-
: comp(comp_), scratch(scratch_) {}
110-
111-
template <typename Group, typename Ptr>
112-
void operator()([[maybe_unused]] Group g, [[maybe_unused]] Ptr first,
113-
[[maybe_unused]] Ptr last) {
114-
#ifdef __SYCL_DEVICE_ONLY__
115-
using T = typename sycl::detail::GetValueType<Ptr>::type;
116-
size_t n = std::distance(first, last);
117-
T *scratch_begin = sycl::detail::align_scratch<T>(scratch, g, n);
118-
sycl::detail::merge_sort(g, first, n, comp, scratch_begin);
119-
#else
120-
throw sycl::exception(
121-
std::error_code(PI_ERROR_INVALID_DEVICE, sycl::sycl_category()),
122-
"default_sorter constructor is not supported on host device.");
123-
#endif
124-
}
125-
126-
template <typename Group, typename T>
127-
T operator()([[maybe_unused]] Group g, T val) {
128-
#ifdef __SYCL_DEVICE_ONLY__
129-
std::size_t local_id = g.get_local_linear_id();
130-
auto range_size = g.get_local_range().size();
131-
T *scratch_begin = sycl::detail::align_scratch<T>(
132-
scratch, g, /* output storage and temporary storage */ 2 * range_size);
133-
scratch_begin[local_id] = val;
134-
sycl::detail::merge_sort(g, scratch_begin, range_size, comp,
135-
scratch_begin + range_size);
136-
val = scratch_begin[local_id];
137-
#else
138-
throw sycl::exception(
139-
std::error_code(PI_ERROR_INVALID_DEVICE, sycl::sycl_category()),
140-
"default_sorter operator() is not supported on host device.");
141-
#endif
142-
return val;
143-
}
144-
145-
template <typename T>
146-
static constexpr size_t memory_required(sycl::memory_scope,
147-
size_t range_size) {
148-
return range_size * sizeof(T) + alignof(T);
149-
}
150-
151-
template <typename T, int dim = 1>
152-
static constexpr size_t memory_required(sycl::memory_scope scope,
153-
sycl::range<dim> r) {
154-
return 2 * memory_required<T>(scope, r.size());
155-
}
156-
};
157-
158100
enum class sorting_order { ascending, descending };
159101

160-
namespace detail {
161-
162-
template <typename T, sorting_order = sorting_order::ascending>
163-
struct ConvertToComp {
164-
using Type = std::less<T>;
165-
};
166-
167-
template <typename T> struct ConvertToComp<T, sorting_order::descending> {
168-
using Type = std::greater<T>;
169-
};
170-
} // namespace detail
171-
172-
// Radix sorter provided by the first version of the extension specification.
173-
template <typename ValT, sorting_order OrderT = sorting_order::ascending,
174-
unsigned int BitsPerPass = 4>
175-
class radix_sorter {
176-
177-
sycl::span<std::byte> scratch;
178-
uint32_t first_bit = 0;
179-
uint32_t last_bit = 0;
180-
181-
static constexpr uint32_t bits = BitsPerPass;
182-
using bitset_t = std::bitset<sizeof(ValT) * CHAR_BIT>;
183-
184-
public:
185-
template <size_t Extent>
186-
radix_sorter(sycl::span<std::byte, Extent> scratch_,
187-
const bitset_t mask = bitset_t{}.set())
188-
: scratch(scratch_) {
189-
static_assert((std::is_arithmetic<ValT>::value ||
190-
std::is_same<ValT, sycl::half>::value ||
191-
std::is_same<ValT, sycl::ext::oneapi::bfloat16>::value),
192-
"radix sort is not usable");
193-
194-
for (first_bit = 0; first_bit < mask.size() && !mask[first_bit];
195-
++first_bit)
196-
;
197-
for (last_bit = first_bit; last_bit < mask.size() && mask[last_bit];
198-
++last_bit)
199-
;
200-
}
201-
202-
template <typename GroupT, typename PtrT>
203-
void operator()([[maybe_unused]] GroupT g, [[maybe_unused]] PtrT first,
204-
[[maybe_unused]] PtrT last) {
205-
#ifdef __SYCL_DEVICE_ONLY__
206-
sycl::detail::privateDynamicSort</*is_key_value=*/false,
207-
OrderT == sorting_order::ascending,
208-
/*empty*/ 1, BitsPerPass>(
209-
g, first, /*empty*/ first, std::distance(first, last), scratch.data(),
210-
first_bit, last_bit);
211-
#else
212-
throw sycl::exception(
213-
std::error_code(PI_ERROR_INVALID_DEVICE, sycl::sycl_category()),
214-
"radix_sorter is not supported on host device.");
215-
#endif
216-
}
217-
218-
template <typename GroupT>
219-
ValT operator()([[maybe_unused]] GroupT g, [[maybe_unused]] ValT val) {
220-
#ifdef __SYCL_DEVICE_ONLY__
221-
ValT result[]{val};
222-
sycl::detail::privateStaticSort</*is_key_value=*/false,
223-
/*is_input_blocked=*/true,
224-
/*is_output_blocked=*/true,
225-
OrderT == sorting_order::ascending,
226-
/*items_per_work_item=*/1, bits>(
227-
g, result, /*empty*/ result, scratch.data(), first_bit, last_bit);
228-
return result[0];
229-
#else
230-
throw sycl::exception(
231-
std::error_code(PI_ERROR_INVALID_DEVICE, sycl::sycl_category()),
232-
"radix_sorter is not supported on host device.");
233-
#endif
234-
}
235-
236-
static constexpr size_t memory_required(sycl::memory_scope,
237-
size_t range_size) {
238-
return range_size * sizeof(ValT) +
239-
(1 << bits) * range_size * sizeof(uint32_t) + alignof(uint32_t);
240-
}
241-
242-
// memory_helpers
243-
template <int dimensions = 1>
244-
static constexpr size_t memory_required(sycl::memory_scope,
245-
sycl::range<dimensions> local_range) {
246-
return (std::max)(local_range.size() * sizeof(ValT),
247-
local_range.size() * (1 << bits) * sizeof(uint32_t));
248-
}
249-
};
250-
251-
// Default sorters provided by the second version of the extension
252-
// specification.
253102
namespace default_sorters {
254103

255104
template <typename CompareT = std::less<>> class joint_sorter {
@@ -458,7 +307,6 @@ class group_key_value_sorter {
458307
};
459308
} // namespace default_sorters
460309

461-
// Radix sorters provided by the second version of the extension specification.
462310
namespace radix_sorters {
463311

464312
template <typename ValT, sorting_order OrderT = sorting_order::ascending,

sycl/source/feature_test.hpp.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ inline namespace _V1 {
4242
#define SYCL_EXT_ONEAPI_FREE_FUNCTION_QUERIES 1
4343
#define SYCL_EXT_ONEAPI_BINDLESS_IMAGES 1
4444
#define SYCL_EXT_ONEAPI_GROUP_ALGORITHMS 1
45-
#define SYCL_EXT_ONEAPI_GROUP_SORT 1
45+
#define SYCL_EXT_ONEAPI_GROUP_SORT 2
4646
#define SYCL_EXT_ONEAPI_KERNEL_COMPILER 1
4747
#define SYCL_EXT_ONEAPI_KERNEL_COMPILER_OPENCL 1
4848
#define SYCL_EXT_ONEAPI_KERNEL_COMPILER_SPIRV 1

sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/group_and_joint_sort.cpp

Lines changed: 2 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
// REQUIRES: sg-8
2-
// RUN: %{build} -fsycl-device-code-split=per_kernel -DVERSION=1 -o %t1.out
3-
// RUN: %{run} %t1.out
4-
// RUN: %{build} -fsycl-device-code-split=per_kernel -DVERSION=2 -o %t2.out
5-
// RUN: %{run} %t2.out
2+
// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out
3+
// RUN: %{run} %t.out
64
// UNSUPPORTED: accelerator
75

86
// The test verifies sort API extension.
@@ -40,31 +38,6 @@
4038
#include <random>
4139
#include <vector>
4240

43-
#if VERSION == 1
44-
template <class CompT, class T> struct RadixSorterType;
45-
46-
template <class T> struct RadixSorterType<std::greater<T>, T> {
47-
using Type =
48-
oneapi_exp::radix_sorter<T, oneapi_exp::sorting_order::descending>;
49-
};
50-
51-
template <class T> struct RadixSorterType<std::less<T>, T> {
52-
using Type =
53-
oneapi_exp::radix_sorter<T, oneapi_exp::sorting_order::ascending>;
54-
};
55-
56-
// Dummy overloads for CustomType which is not supported by radix sorter
57-
template <> struct RadixSorterType<std::less<CustomType>, CustomType> {
58-
using Type =
59-
oneapi_exp::radix_sorter<int, oneapi_exp::sorting_order::ascending>;
60-
};
61-
62-
template <> struct RadixSorterType<std::greater<CustomType>, CustomType> {
63-
using Type =
64-
oneapi_exp::radix_sorter<int, oneapi_exp::sorting_order::descending>;
65-
};
66-
#endif
67-
6841
template <UseGroupT UseGroup, int Dims, class T, class Compare>
6942
void RunJointSort(sycl::queue &Q, const std::vector<T> &DataToSort,
7043
const Compare &Comp) {
@@ -76,40 +49,24 @@ void RunJointSort(sycl::queue &Q, const std::vector<T> &DataToSort,
7649

7750
constexpr size_t NumSubGroups = WGSize / ReqSubGroupSize;
7851

79-
#if VERSION == 1
80-
using RadixSorterT = typename RadixSorterType<Compare, T>::Type;
81-
#else
8252
using RadixSorterT = oneapi_exp::radix_sorters::joint_sorter<
8353
typename ConvertToSimpleType<T>::Type,
8454
ConvertToSortingOrder<Compare>::Type>;
85-
#endif
8655

8756
std::size_t LocalMemorySizeDefault = 0;
8857
std::size_t LocalMemorySizeRadix = 0;
8958
if (UseGroup == UseGroupT::SubGroup) {
9059
// Each sub-group needs a piece of memory for sorting
91-
#if VERSION == 1
92-
LocalMemorySizeDefault =
93-
oneapi_exp::default_sorter<Compare>::template memory_required<T>(
94-
sycl::memory_scope::sub_group, ReqSubGroupSize * ElemsPerWI);
95-
#else
9660
LocalMemorySizeDefault = oneapi_exp::default_sorters::joint_sorter<
9761
Compare>::template memory_required<T>(sycl::memory_scope::sub_group,
9862
ReqSubGroupSize * ElemsPerWI);
99-
#endif
10063
LocalMemorySizeRadix = RadixSorterT::memory_required(
10164
sycl::memory_scope::sub_group, ReqSubGroupSize * ElemsPerWI);
10265
} else {
10366
// A single chunk of memory for each work-group
104-
#if VERSION == 1
105-
LocalMemorySizeDefault =
106-
oneapi_exp::default_sorter<Compare>::template memory_required<T>(
107-
sycl::memory_scope::work_group, WGSize * ElemsPerWI);
108-
#else
10967
LocalMemorySizeDefault = oneapi_exp::default_sorters::joint_sorter<
11068
Compare>::template memory_required<T>(sycl::memory_scope::work_group,
11169
WGSize * ElemsPerWI);
112-
#endif
11370
LocalMemorySizeRadix = RadixSorterT::memory_required(
11471
sycl::memory_scope::sub_group, WGSize * ElemsPerWI);
11572
}
@@ -203,13 +160,8 @@ void RunJointSort(sycl::queue &Q, const std::vector<T> &DataToSort,
203160

204161
oneapi_exp::joint_sort(
205162
Group, &AccToSort2[StartIdx], &AccToSort2[EndIdx],
206-
#if VERSION == 1
207-
oneapi_exp::default_sorter<Compare>(sycl::span{
208-
&ScratchDefault[LocalPartID], LocalMemorySizeDefault}));
209-
#else
210163
oneapi_exp::default_sorters::joint_sorter<Compare>(sycl::span{
211164
&ScratchDefault[LocalPartID], LocalMemorySizeDefault}));
212-
#endif
213165

214166
const size_t LocalPartIDRadix =
215167
UseGroup == UseGroupT::SubGroup
@@ -280,42 +232,20 @@ void RunSortOVerGroup(sycl::queue &Q, const std::vector<T> &DataToSort,
280232
std::size_t LocalMemorySizeRadix = 0;
281233
if (UseGroup == UseGroupT::SubGroup) {
282234
// Each sub-group needs a piece of memory for sorting
283-
#if VERSION == 1
284-
LocalMemorySizeDefault =
285-
oneapi_exp::default_sorter<Compare>::template memory_required<T>(
286-
sycl::memory_scope::sub_group, sycl::range<1>{ReqSubGroupSize});
287-
#else
288235
LocalMemorySizeDefault = oneapi_exp::default_sorters::group_sorter<
289236
T, Compare, 1>::memory_required(sycl::memory_scope::sub_group,
290237
ReqSubGroupSize);
291-
#endif
292238

293-
#if VERSION == 1
294-
LocalMemorySizeRadix = RadixSorterT::memory_required(
295-
sycl::memory_scope::sub_group, sycl::range<1>{ReqSubGroupSize});
296-
#else
297239
LocalMemorySizeRadix = RadixSorterT::memory_required(
298240
sycl::memory_scope::sub_group, ReqSubGroupSize);
299-
#endif
300241
} else {
301242
// A single chunk of memory for each work-group
302-
#if VERSION == 1
303-
LocalMemorySizeDefault =
304-
oneapi_exp::default_sorter<Compare>::template memory_required<T>(
305-
sycl::memory_scope::work_group, sycl::range<1>{NumOfElements});
306-
#else
307243
LocalMemorySizeDefault = oneapi_exp::default_sorters::group_sorter<
308244
T, Compare, 1>::memory_required(sycl::memory_scope::work_group,
309245
NumOfElements);
310-
#endif
311246

312-
#if VERSION == 1
313-
LocalMemorySizeRadix = RadixSorterT::memory_required(
314-
sycl::memory_scope::work_group, sycl::range<1>{NumOfElements});
315-
#else
316247
LocalMemorySizeRadix = RadixSorterT::memory_required(
317248
sycl::memory_scope::work_group, NumOfElements);
318-
#endif
319249
}
320250

321251
std::vector<T> DataToSortCase0 = DataToSort;
@@ -388,13 +318,8 @@ void RunSortOVerGroup(sycl::queue &Q, const std::vector<T> &DataToSort,
388318

389319
AccToSort2[GlobalLinearID] = oneapi_exp::sort_over_group(
390320
Group, AccToSort2[GlobalLinearID],
391-
#if VERSION == 1
392-
oneapi_exp::default_sorter<Compare>(
393-
sycl::span{ScratchPtrDefault, LocalMemorySizeDefault}));
394-
#else
395321
oneapi_exp::default_sorters::group_sorter<T, Compare, 1>(
396322
sycl::span{ScratchPtrDefault, LocalMemorySizeDefault}));
397-
#endif
398323

399324
// Each sub-group should use it's own part of the scratch pad
400325
const size_t ScratchShiftRadix =

0 commit comments

Comments
 (0)