10
10
:encoding: utf-8
11
11
:lang: en
12
12
:dpcpp: pass:[DPC++]
13
+ :endnote: —{nbsp}end{nbsp}note
13
14
14
15
// Set the default source code type in this document to C++,
15
16
// for syntax highlighting purposes. This is needed because
@@ -106,61 +107,74 @@ If the `sycl::nd_range` parameter used to launch a kernel is incompatible with
106
107
the results of a kernel's launch queries, an implementation must throw a
107
108
synchronous exception with the `errc::nd_range` error code.
108
109
109
- [NOTE]
110
- ====
111
- The values returned by `ext_oneapi_get_info` account for all properties
112
- attached to a kernel (via the mechanisms defined in the
110
+ [_Note_: The values returned by `ext_oneapi_get_info` account for all
111
+ properties attached to a kernel (via the mechanisms defined in the
113
112
sycl_ext_oneapi_kernel_properties extension), as well as the usage of features
114
- like group algorithms and work-group local memory. Developers should assume
115
- that the values will differ across kernels.
116
- ====
113
+ like group algorithms and work-group local memory.
114
+ Developers should assume that the values will differ across
115
+ kernels._{endnote}_]
117
116
118
117
[source,c++]
119
118
----
120
119
namespace sycl {
121
120
122
121
class kernel {
123
122
public:
124
- template <typename Param, typename... T>
125
- /*return-type*/ ext_oneapi_get_info(T... args) const;
126
- };
127
123
128
- }
129
- ----
124
+ // Only available if Param is max_work_item_sizes<1>
125
+ template <typename Param>
126
+ id<1> ext_oneapi_get_info(sycl::queue q) const;
130
127
131
- [source,c++]
132
- ----
133
- template <typename Param, typename... T>
134
- /*return-type*/ ext_oneapi_get_info(T... args) const;
135
- ----
136
- _Constraints_: Available only when the types `+T...+` described by the parameter
137
- pack match the types defined in the table below.
128
+ // Only available if Param is max_work_item_sizes<2>
129
+ template <typename Param>
130
+ id<2> ext_oneapi_get_info(sycl::queue q) const;
138
131
139
- _Preconditions_: `Param` must be one of the `info::kernel` descriptors defined
140
- in this extension.
132
+ // Only available if Param is max_work_item_sizes<3>
133
+ template <typename Param>
134
+ id<3> ext_oneapi_get_info(sycl::queue q) const;
141
135
142
- _Returns_: Information about the kernel that applies when the kernel is
143
- submitted with the configuration described by the parameter pack `+T...+`.
144
- The return type is defined in the table below.
136
+ // Only available if Param is max_work_group_size
137
+ template <typename Param>
138
+ size_t ext_oneapi_get_info(sycl::queue q) const;
145
139
146
- This extension adds several new queries to this interface, many of which
147
- already have equivalents in the `kernel_device_specific` or `device`
148
- namespaces.
140
+ // Only available if Param is max_num_work_groups
141
+ template <typename Param>
142
+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<1> r, size_t bytes = 0) const;
149
143
150
- NOTE: These queries are queue- and not device-specific because it is
151
- anticipated that implementations will introduce finer-grained queue
152
- controls that impact the scheduling of kernels.
144
+ // Only available if Param is max_num_work_groups
145
+ template <typename Param>
146
+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<2> r, size_t bytes = 0) const;
153
147
154
- NOTE: Allowing devices to return a value of 1 for these queries maximizes the
155
- chances that code written to use certain extension remains portable. However,
156
- the performance of kernels using only one work-group, sub-group or work-item
157
- may be limited on some (highly parallel) devices. If certain properties (e.g.
158
- forward progress guarantees, cross-work-group synchronization) are being used
159
- as part of a performance optimization, developers should check that the values
160
- returned by these queries is not 1.
148
+ // Only available if Param is max_num_work_groups
149
+ template <typename Param>
150
+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<3> r, size_t bytes = 0) const;
151
+
152
+ // Only available if Param is max_sub_group_size
153
+ template <typename Param>
154
+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<1> r) const;
155
+
156
+ // Only available if Param is max_sub_group_size
157
+ template <typename Param>
158
+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<2> r) const;
159
+
160
+ // Only available if Param is max_sub_group_size
161
+ template <typename Param>
162
+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<3> r) const;
163
+
164
+ // Only available if Param is num_sub_groups
165
+ template <typename Param>
166
+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<1> r) const;
167
+
168
+ // Only available if Param is num_sub_groups
169
+ template <typename Param>
170
+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<2> r) const;
171
+
172
+ // Only available if Param is num_sub_groups
173
+ template <typename Param>
174
+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<3> r) const;
175
+
176
+ };
161
177
162
- [source, c++]
163
- ----
164
178
namespace ext::oneapi::experimental::info::kernel {
165
179
166
180
template <uint32_t Dimensions>
@@ -169,91 +183,156 @@ struct max_work_item_sizes;
169
183
struct max_work_group_size;
170
184
struct max_num_work_groups;
171
185
172
- }
186
+ struct max_sub_group_size;
187
+ struct num_sub_groups;
188
+
189
+ } // namespace ext::oneapi::experimental::info::kernel
190
+
191
+ } // namespace sycl
173
192
----
174
193
175
- [%header,cols="1,5,5,5"]
176
- |===
177
- |Kernel Descriptor
178
- |Argument Types
179
- |Return Type
180
- |Description
194
+ ==== Querying valid launch configurations
195
+
196
+ This extension adds several new queries for reasoning about the set of valid
197
+ launch configurations for a given kernel, many of which already have
198
+ equivalents in the `kernel_device_specific` or `device` namespaces.
199
+
200
+ [_Note_: These queries are queue- and not device-specific because it is
201
+ anticipated that implementations will introduce finer-grained queue
202
+ controls that impact the scheduling of kernels._{endnote}_]
203
+
204
+ [_Note_: Allowing devices to return a value of 1 for these queries maximizes
205
+ the chances that code written to use certain extension remains portable.
206
+ However, the performance of kernels using only one work-group, sub-group or
207
+ work-item may be limited on some (highly parallel) devices.
208
+ If certain properties (e.g. forward progress guarantees, cross-work-group
209
+ synchronization) are being used as part of a performance optimization,
210
+ developers should check that the values returned by these queries is not
211
+ 1._{endnote}_]
212
+
213
+ '''
214
+
215
+ [source,c++]
216
+ ----
217
+ template <typename Param>
218
+ id<1> ext_oneapi_get_info(sycl::queue q) const; // (1)
219
+
220
+ template <typename Param>
221
+ id<2> ext_oneapi_get_info(sycl::queue q) const; // (2)
222
+
223
+ template <typename Param>
224
+ id<3> ext_oneapi_get_info(sycl::queue q) const; // (3)
225
+ ----
226
+ _Constraints (1)_: `Param` is `max_work_item_sizes<1>`.
227
+
228
+ _Constraints (2)_: `Param` is `max_work_item_sizes<2>`.
229
+
230
+ _Constraints (3)_: `Param` is `max_work_item_sizes<3>`.
231
+
232
+ _Returns_: The maximum number of work-items that are permitted in each
233
+ dimension of a work-group, when the kernel is submitted to the specified queue,
234
+ accounting for any kernel properties or features.
235
+ If the kernel can be submitted to the specified queue without an error, the
236
+ minimum value returned by this query is 1, otherwise it is 0.
181
237
182
- |`template <uint32_t Dimensions>
183
- max_work_item_sizes`
184
- |`sycl::queue`
185
- |`id<Dimensions>`
186
- |Returns the maximum number of work-items that are permitted in each dimension
187
- of a work-group, when the kernel is submitted to the specified queue,
188
- accounting for any kernel properties or features. If the kernel can be
189
- submitted to the specified queue without an error, the minimum value returned
190
- by this query is 1, otherwise it is 0.
191
-
192
- |`max_work_group_size`
193
- |`sycl::queue`
194
- |`size_t`
195
- |Returns the maximum number of work-items that are permitted in a work-group,
238
+ '''
239
+
240
+ [source,c++]
241
+ ----
242
+ template <typename Param>
243
+ size_t ext_oneapi_get_info(sycl::queue q) const;
244
+ ----
245
+ _Constraints_: `Param` is `max_work_group_size`.
246
+
247
+ _Returns_: The maximum number of work-items that are permitted in a work-group,
196
248
when the kernel is submitted to the specified queue, accounting for any
197
- kernel properties or features. If the kernel can be submitted to the specified
198
- queue without an error, the minimum value returned by this query is 1,
199
- otherwise it is 0.
200
-
201
- |`max_num_work_groups`
202
- |`sycl::queue`, `sycl::range`, `size_t`
203
- |`size_t`
204
- |Returns the maximum number of work-groups, when the kernel is submitted to the
205
- specified queue with the specified work-group size and the specified amount of
206
- dynamic work-group local memory (in bytes), accounting for any kernel
207
- properties or features. If the specified work-group size is 0, which is
208
- invalid, then the implementation will throw a synchronous exception with the
209
- `errc::invalid` error code. If the kernel can be submitted to the specified
210
- queue without an error, the minimum value returned by this query is 1,
211
- otherwise it is 0.
249
+ kernel properties or features.
250
+ If the kernel can be submitted to the specified queue without an error, the
251
+ minimum value returned by this query is 1, otherwise it is 0.
212
252
213
- |===
253
+ '''
214
254
215
- A separate set of launch queries can be used to reason about how an
216
- implementation will launch a kernel on the specified queue. The values of these
217
- queries should also be checked if a kernel is expected to be launched in a
218
- specific way (e.g., if the kernel requires two sub-groups for correctness).
255
+ [source,c++]
256
+ ----
257
+ template <typename Param>
258
+ size_t ext_oneapi_get_info(sycl::queue q, sycl::range<1> r, size_t bytes = 0) const;
219
259
220
- [source, c++]
260
+ template <typename Param>
261
+ size_t ext_oneapi_get_info(sycl::queue q, sycl::range<2> r, size_t bytes = 0) const;
262
+
263
+ template <typename Param>
264
+ size_t ext_oneapi_get_info(sycl::queue q, sycl::range<3> r, size_t bytes = 0) const;
221
265
----
222
- namespace ext::oneapi::experimental::info::kernel {
266
+ _Constraints_: `Param` is `max_num_work_groups`.
223
267
224
- struct max_sub_group_size;
225
- struct num_sub_groups;
268
+ _Returns_: The maximum number of work-groups, when the kernel is submitted to
269
+ the specified queue with the specified work-group size and the specified amount
270
+ of dynamic work-group local memory (in bytes), accounting for any kernel
271
+ properties or features.
272
+ If the kernel can be submitted to the specified queue without an
273
+ error, the minimum value returned by this query is 1, otherwise it is 0.
274
+
275
+ _Throws_: A synchronous `exception` with the error code `errc::invalid` if the
276
+ work-group size `r` is 0.
277
+
278
+
279
+ ==== Querying launch behavior
280
+
281
+ A separate set of launch queries can be used to reason about how an
282
+ implementation will launch a kernel on the specified queue.
283
+ The values of these queries should also be checked if a kernel is expected to
284
+ be launched in a specific way (e.g., if the kernel requires two sub-groups for
285
+ correctness).
286
+
287
+ '''
226
288
227
- }
289
+ [source,c++]
228
290
----
291
+ template <typename Param>
292
+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<1> r) const;
229
293
230
- [%header,cols="1,5,5,5"]
231
- |===
232
- |Kernel Descriptor
233
- |Argument Types
234
- |Return Type
235
- |Description
294
+ template <typename Param>
295
+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<2> r) const;
236
296
237
- |`max_sub_group_size`
238
- |`sycl::queue`, `sycl::range`
239
- |`uint32_t`
240
- |Returns the maximum sub-group size, when the kernel is submitted to the
297
+ template <typename Param>
298
+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<3> r) const;
299
+ ----
300
+ _Constraints_: `Param` is `max_sub_group_size`.
301
+
302
+ _Returns_: The maximum sub-group size, when the kernel is submitted to the
241
303
specified queue with the specified work-group size, accounting for any kernel
242
- properties or features. The return value of this query must match the value
243
- returned by `sub_group::get_max_local_range()` inside the kernel. If the kernel
244
- can be submitted to the specified queue without an error, the minimum value
245
- returned by this query is 1, otherwise it is 0.
246
-
247
- |`num_sub_groups`
248
- |`sycl::queue`, `sycl::range`
249
- |`uint32_t`
250
- |Returns the number of sub-groups per work-group, when the kernel is submitted
251
- to the specified queue with the specified work-group size, accounting for any
252
- kernel properties or features. If the kernel can be submitted to the specified
253
- queue without an error, the minimum value returned by this query is 1,
254
- otherwise it is 0.
304
+ properties or features.
305
+ The return value of this query must match the value returned by
306
+ `sub_group::get_max_local_range()` inside the kernel.
307
+ If the kernel can be submitted to the specified queue without an error, the
308
+ minimum value returned by this query is 1, otherwise it is 0.
255
309
256
- |===
310
+ _Throws_: A synchronous `exception` with the error code `errc::invalid` if the
311
+ work-group size `r` is 0.
312
+
313
+ '''
314
+
315
+ [source,c++]
316
+ ----
317
+ template <typename Param>
318
+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<1> r) const;
319
+
320
+ template <typename Param>
321
+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<2> r) const;
322
+
323
+ template <typename Param>
324
+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<3> r) const;
325
+ ----
326
+ _Constraints_: `Param` is `num_sub_groups`.
327
+
328
+ _Returns_: The number of sub-groups per work-group, when the kernel is
329
+ submitted to the specified queue with the specified work-group size, accounting
330
+ for any kernel properties or features.
331
+ If the kernel can be submitted to the specified queue without an error, the
332
+ minimum value returned by this query is 1, otherwise it is 0.
333
+
334
+ _Throws_: A synchronous `exception` with the error code `errc::invalid` if the
335
+ work-group size `r` is 0.
257
336
258
337
== Issues
259
338
0 commit comments