@@ -514,8 +514,8 @@ namespace sycl::ext::oneapi::experimental {
514
514
void operator()(Group g, Ptr first, Ptr last); // (2)
515
515
516
516
template<typename T>
517
- static constexpr size_t
518
- memory_required(sycl::memory_scope scope, std::size_t range_size); // (3)
517
+ static size_t
518
+ memory_required(sycl::device d, sycl:: memory_scope scope, std::size_t range_size); // (3)
519
519
};
520
520
521
521
template<typename T,
@@ -534,8 +534,8 @@ namespace sycl::ext::oneapi::experimental {
534
534
sycl::span<T, ElementsPerWorkItem> values,
535
535
Properties properties); // (6)
536
536
537
- static constexpr size_t
538
- memory_required(sycl::memory_scope scope, std::size_t range_size); // (7)
537
+ static size_t
538
+ memory_required(sycl::device d, sycl:: memory_scope scope, std::size_t range_size); // (7)
539
539
};
540
540
541
541
template<typename T,
@@ -557,8 +557,8 @@ namespace sycl::ext::oneapi::experimental {
557
557
sycl::span<U, ElementsPerWorkItem> values,
558
558
Properties property); // (10)
559
559
560
- static constexpr std::size_t
561
- memory_required(sycl::memory_scope scope, std::size_t range_size); // (11)
560
+ static std::size_t
561
+ memory_required(sycl::device d, sycl:: memory_scope scope, std::size_t range_size); // (11)
562
562
};
563
563
}
564
564
@@ -671,9 +671,11 @@ the `joint_sort` algorithm.
671
671
_Complexity_: Let `N` be `last - first`. `O(N*log(N)*log(N))` comparisons.
672
672
673
673
(3) Returns size of temporary memory (in bytes) that is required by
674
- the default sorting algorithm defined by the sorter calling by `joint_sort`.
674
+ the default sorting algorithm defined by the sorter calling by `joint_sort`
675
+ depending on `d`.
675
676
`range_size` represents a range size for sorting,
676
677
e.g. `last-first` from `operator()` arguments.
678
+ It mustn't be called within a SYCL kernel, only on host.
677
679
Result depends on the `scope` parameter:
678
680
use `sycl::memory_scope::work_group` to get memory size required
679
681
for each work-group;
@@ -694,9 +696,11 @@ _Complexity_: Let `N` be the `Group` size multiplied by `ElementsPerWorkItem`.
694
696
`O(N*log(N)*log(N))` comparisons.
695
697
696
698
(7) Returns the size of temporary memory (in bytes) that is required by the default
697
- sorting algorithm defined by the sorter calling by `sort_over_group`.
699
+ sorting algorithm defined by the sorter calling by `sort_over_group`
700
+ depending on `d`.
698
701
`ElementsPerWorkItem` is the extent parameter for `sycl::span`
699
702
that is an input parameter for `sort_over_group`.
703
+ It mustn't be called within a SYCL kernel, only on host.
700
704
If `scope == sycl::memory_scope::work_group`,
701
705
`range_size` is the size of the local range for `sycl::nd_range`
702
706
that was used to run the kernel;
@@ -719,7 +723,9 @@ _Complexity_: Let `N` be the `Group` size multiplied by `ElementsPerWorkItem`.
719
723
720
724
(11) Returns size of temporary memory (in bytes) that is required by
721
725
the default key-value
722
- sorting algorithm defined by the sorter calling by `sort_key_value_over_group`.
726
+ sorting algorithm defined by the sorter calling by `sort_key_value_over_group`
727
+ depending on `d`.
728
+ It mustn't be called within a SYCL kernel, only on host.
723
729
If `scope == sycl::memory_scope::work_group`,
724
730
`range_size` is the size of the local range for `sycl::nd_range`
725
731
that was used to run the kernel;
@@ -998,7 +1004,7 @@ namespace my_sycl = sycl::ext::oneapi::experimental;
998
1004
// calculate required local memory size
999
1005
size_t temp_memory_size =
1000
1006
my_sycl::default_sorters::joint_sorter<>::memory_required<T>(
1001
- sycl::memory_scope::work_group, n);
1007
+ d, sycl::memory_scope::work_group, n);
1002
1008
1003
1009
q.submit([&](sycl::handler& h) {
1004
1010
auto acc = sycl::accessor(buf, h);
@@ -1075,7 +1081,7 @@ using TupleType =
1075
1081
// calculate required local memory size
1076
1082
size_t temp_memory_size =
1077
1083
my_sycl::default_sorters::joint_sorter<>::memory_required<TupleType>(
1078
- sycl::memory_scope::work_group, n);
1084
+ d, sycl::memory_scope::work_group, n);
1079
1085
1080
1086
q.submit([&](sycl::handler& h) {
1081
1087
auto keys_acc = sycl::accessor(keys_buf, h);
@@ -1185,8 +1191,6 @@ because it's easy to pass different comparator types.
1185
1191
. Think about reducing overloads for sorting functions. The thing is that
1186
1192
overloads with `Compare` objects seems extra and overloads with sorters,
1187
1193
without sorters are enough.
1188
- . It would be better if `memory_required` methods had a `sycl::device` parameter
1189
- because different devices can require different amount of memory.
1190
1194
1191
1195
== Non-implemented features
1192
1196
Please, note that following is not inplemented yet for the open-source repo:
@@ -1206,4 +1210,5 @@ Please, note that following is not inplemented yet for the open-source repo:
1206
1210
|3|2021-12-16|Andrey Fedorov|Some refactoring, sections reordering,
1207
1211
making the entire extension experimental
1208
1212
|4|2022-11-14|Andrey Fedorov|Fixed size arrays, key-value sorting and properties
1213
+ |5|2023-11-09|Andrey Fedorov|Changed `memory_required` functions for default sorters
1209
1214
|========================================
0 commit comments