Skip to content

Commit b299e0f

Browse files
committed
Update the experimental latency control API to use property list
1 parent a37ca84 commit b299e0f

File tree

8 files changed

+491
-220
lines changed

8 files changed

+491
-220
lines changed

sycl/doc/extensions/supported/sycl_ext_intel_dataflow_pipes.asciidoc

Lines changed: 33 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -642,55 +642,47 @@ Automated mechanisms are possible to provide uniquification across calls, and co
642642

643643
*NOTE*: The APIs described in this section are experimental. Future versions of this extension may change these APIs in ways that are incompatible with the versions described here.
644644

645-
In the experimental API version, read/write methods take template arguments, which can contain the latency control properties `latency_anchor_id` and/or `latency_constraint`.
645+
In the experimental API version, read/write methods take in a property list as function argument, which can contain the latency control properties `latency_anchor_id` and/or `latency_constraint`.
646646

647-
* `sycl::ext::intel::experimental::latency_anchor_id<N>`, where `N` is an integer: An ID to associate with the current read/write function call, which can then be referenced by other `latency_constraint` properties elsewhere in the program to define relative latency constaints. ID must be unique within the application, and a diagnostic is required if that condition is not met.
648-
* `sycl::ext::intel::experimental::latency_constraint<A, B, C>`: A tuple of three values which cause the current read/write function call to act as an endpoint of a latency constraint relative to a specified `latency_anchor_id` defined by a different instruction.
647+
* `sycl::ext::oneapi::experimental::latency_anchor_id<N>`, where `N` is an integer: An ID to associate with the current read/write function call, which can then be referenced by other `latency_constraint` properties elsewhere in the program to define relative latency constaints. ID must be unique within the application, and a diagnostic is required if that condition is not met.
648+
* `sycl::ext::oneapi::experimental::latency_constraint<A, B, C>`: A tuple of three values which cause the current read/write function call to act as an endpoint of a latency constraint relative to a specified `latency_anchor_id` defined by a different instruction.
649649
** `A` is an integer: The ID of the target anchor defined on a different instruction through a `latency_anchor_id` property.
650-
** `B` is an enum value: The type of control from the set {`type::exact`, `type::max`, `type::min`}.
650+
** `B` is an enum value: The type of control from the set {`latency_control_type::exact`, `latency_control_type::max`, `latency_control_type::min`}.
651651
** `C` is an integer: The relative clock cycle difference between the target anchor and the current function call, that the constraint should infer subject to the type of the control (exact, max, min).
652652

653-
The template arguments above don't have to be specified if user doesn't want to apply latency controls. The template arguments can be passed in arbitrary order.
654-
655653
=== Implementation
656654

657655
[source,c++]
658656
----
659-
// Added in version 2 of this extension.
660-
namespace sycl::ext::intel::experimental {
661-
enum class type {
662-
none, // default
663-
exact,
664-
max,
665-
min
666-
};
667-
668-
template <int32_t _N> struct latency_anchor_id {
669-
static constexpr int32_t value = _N;
670-
static constexpr int32_t default_value = -1;
671-
};
672-
673-
template <int32_t _N1, type _N2, int32_t _N3> struct latency_constraint {
674-
static constexpr std::tuple<int32_t, type, int32_t> value = {_N1, _N2, _N3};
675-
static constexpr std::tuple<int32_t, type, int32_t> default_value = {
676-
0, type::none, 0};
677-
};
657+
// Added in version 3 of this extension.
658+
#include <sycl/ext/oneapi/latency_control/properties.hpp>
659+
#include <sycl/ext/oneapi/properties/properties.hpp>
678660
679661
template <typename name,
680662
typename dataT,
681663
size_t min_capacity = 0>
682664
class pipe {
683665
// Blocking
684-
template <class... _Params>
666+
template <typename _propertiesT = decltype(sycl::ext::oneapi::experimental::properties{})>
685667
static dataT read();
686-
template <class... _Params>
668+
template <typename _propertiesT>
669+
static dataT read( _propertiesT Properties );
670+
671+
template <typename _propertiesT = decltype(sycl::ext::oneapi::experimental::properties{})>
687672
static void write( const dataT &data );
673+
template <typename _propertiesT>
674+
static void write( const dataT &data, _propertiesT Properties );
688675
689676
// Non-blocking
690-
template <class... _Params>
677+
template <typename _propertiesT = decltype(sycl::ext::oneapi::experimental::properties{})>
691678
static dataT read( bool &success_code );
692-
template <class... _Params>
679+
template <typename _propertiesT>
680+
static dataT read( bool &success_code, _propertiesT Properties );
681+
682+
template <typename _propertiesT = decltype(sycl::ext::oneapi::experimental::properties{})>
693683
static void write( const dataT &data, bool &success_code );
684+
template <typename _propertiesT>
685+
static void write( const dataT &data, bool &success_code, _propertiesT Properties );
694686
}
695687
} // namespace sycl::ext::intel::experimental
696688
----
@@ -699,7 +691,7 @@ class pipe {
699691

700692
[source,c++]
701693
----
702-
// Added in version 2 of this extension.
694+
// Added in version 3 of this extension.
703695
#include <sycl/ext/intel/fpga_extensions.hpp>
704696
...
705697
using Pipe1 = ext::intel::experimental::pipe<class PipeClass1, int, 8>;
@@ -709,17 +701,20 @@ using Pipe3 = ext::intel::experimental::pipe<class PipeClass2, int, 8>;
709701
myQueue.submit([&](handler &cgh) {
710702
cgh.single_task<class foo>([=] {
711703
// The following Pipe1::read is anchor 0
712-
int value = Pipe1::read<ext::intel::experimental::latency_anchor_id<0>>();
704+
int value = Pipe1::read(ext::oneapi::experimental::properties(
705+
ext::oneapi::experimental::latency_anchor_id<0>));
713706
714707
// The following Pipe2::write is anchor 1
715708
// The following Pipe2::write occurs exactly 2 cycles after anchor 0
716-
Pipe2::write<ext::intel::experimental::latency_anchor_id<1>,
717-
ext::intel::experimental::latency_constraint<
718-
0, ext::intel::experimental::type::exact, 2>>(value);
709+
Pipe2::write(value, ext::oneapi::experimental::properties(
710+
ext::oneapi::experimental::latency_anchor_id<1>,
711+
ext::oneapi::experimental::latency_constraint<
712+
0, ext::oneapi::experimental::latency_control_type::exact, 2>));
719713
720714
// The following Pipe3::write occurs at least 2 cycles after anchor 1
721-
Pipe3::write<ext::intel::experimental::latency_constraint<
722-
1, ext::intel::experimental::type::min, 2>>(value);
715+
Pipe3::write(value, ext::oneapi::experimental::properties(
716+
ext::oneapi::experimental::latency_constraint<
717+
1, ext::oneapi::experimental::latency_control_type::min, 2>));
723718
});
724719
});
725720
----
@@ -739,6 +734,7 @@ extension's APIs the implementation supports.
739734
|Value |Description
740735
|1 |Initial extension version. Base features are supported.
741736
|2 |Add experimental latency control API.
737+
|3 |Update the experimental latency control API to use property list.
742738
|===
743739

744740
== Revision History
@@ -752,6 +748,7 @@ extension's APIs the implementation supports.
752748
|2|2019-11-13|Michael Kinsner|Incorporate feedback
753749
|3|2020-04-27|Michael Kinsner|Clarify that pipe operations behave as-if they are relaxed atomic operations. Make SYCL2020 the baseline
754750
|4|2021-12-02|Shuo Niu|Add experimental latency control API
751+
|5|2022-04-07|Shuo Niu|Update the experimental latency control API to use property list
755752
|========================================
756753

757754
//************************************************************************

sycl/doc/extensions/supported/sycl_ext_intel_fpga_lsu.md

Lines changed: 92 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -126,113 +126,129 @@ this extension may change these APIs in ways that are incompatible with the
126126
versions described here.
127127

128128
In the experimental API version, member functions `load()` and `store()` take
129-
template arguments, which can contain the latency control properties
130-
`latency_anchor_id` and/or `latency_constraint`.
129+
in a property list as function argument, which can contain the latency control
130+
properties `latency_anchor_id` and/or `latency_constraint`.
131131

132-
1. **`sycl::ext::intel::experimental::latency_anchor_id<N>`, where `N` is an integer**:
132+
1. **`sycl::ext::oneapi::experimental::latency_anchor_id<N>`, where `N` is an integer**:
133133
represents ID of the current function call when it performs as an anchor. The ID
134134
must be unique within the application, with a diagnostic required if that
135135
condition is not met.
136-
2. **`sycl::ext::intel::experimental::latency_constraint<A, B, C>`** contains control
136+
2. **`sycl::ext::oneapi::experimental::latency_constraint<A, B, C>`** contains control
137137
parameters when the current function performs as a non-anchor, where:
138138
- **`A` is an integer**: The ID of the target anchor defined on a different
139139
instruction through a `latency_anchor_id` property.
140140
- **`B` is an enum value**: The type of control from the set
141-
{`type::exact`, `type::max`, `type::min`}.
141+
{`latency_control_type::exact`, `latency_control_type::max`, `latency_control_type::min`}.
142142
- **`C` is an integer**: The relative clock cycle difference between the
143143
target anchor and the current function call, that the constraint should
144144
infer subject to the type of the control (exact, max, min).
145145

146-
The template arguments above don't have to be specified if user doesn't want to
147-
apply latency controls. The template arguments can be passed in arbitrary order.
148-
149146
### Implementation
150147
```c++
151-
// Added in version 2 of this extension.
152-
namespace sycl::ext::intel::experimental {
153-
enum class type {
154-
none, // default
155-
exact,
156-
max,
157-
min
158-
};
159-
160-
template <int32_t _N> struct latency_anchor_id {
161-
static constexpr int32_t value = _N;
162-
static constexpr int32_t default_value = -1;
163-
};
164-
165-
template <int32_t _N1, type _N2, int32_t _N3> struct latency_constraint {
166-
static constexpr std::tuple<int32_t, type, int32_t> value = {_N1, _N2, _N3};
167-
static constexpr std::tuple<int32_t, type, int32_t> default_value = {
168-
0, type::none, 0};
169-
};
170-
148+
// Added in version 3 of this extension.
149+
#include <sycl/ext/oneapi/latency_control/properties.hpp>
150+
#include <sycl/ext/oneapi/properties/properties.hpp>
151+
...
171152
template <class... mem_access_params> class lsu final {
172153
public:
173154
lsu() = delete;
174155

175-
template <class... _Params, typename _T, access::address_space _space>
156+
template <typename _T, access::address_space _space,
157+
typename _propertiesT =
158+
decltype(sycl::ext::oneapi::experimental::properties{})>
176159
static _T load(sycl::multi_ptr<_T, _space> Ptr) {
177160
check_space<_space>();
178161
check_load();
179162
#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
180-
static constexpr auto _anchor_id =
181-
__GetValue<int, latency_anchor_id, _Params...>::value;
182-
static constexpr auto _constraint =
183-
__GetValue3<int, type, int, latency_constraint, _Params...>::value;
184-
185-
static constexpr int _target_anchor = std::get<0>(_constraint);
186-
static constexpr type _control_type = std::get<1>(_constraint);
187-
static constexpr int _cycle = std::get<2>(_constraint);
188-
int _type;
189-
if (_control_type == type::none) {
190-
_type = 0;
191-
} else if (_control_type == type::exact) {
192-
_type = 1;
193-
} else if (_control_type == type::max) {
194-
_type = 2;
195-
} else { // _control_type == type::min
196-
_type = 3;
163+
using _latency_anchor_id_prop = typename GetOrDefaultValT<
164+
_propertiesT, sycl::ext::oneapi::experimental::latency_anchor_id_key,
165+
sycl::ext::oneapi::experimental::latency_anchor_id_key::value_t<-1>>::
166+
type;
167+
using _latency_constraint_prop = typename GetOrDefaultValT<
168+
_propertiesT, sycl::ext::oneapi::experimental::latency_constraint_key,
169+
sycl::ext::oneapi::experimental::latency_constraint_key::value_t<
170+
0, sycl::ext::oneapi::experimental::latency_control_type::none,
171+
0>>::type;
172+
static constexpr int32_t _anchor_id = _latency_anchor_id_prop::value;
173+
static constexpr int32_t _target_anchor = _latency_constraint_prop::target;
174+
static constexpr sycl::ext::oneapi::experimental::latency_control_type
175+
_control_type = _latency_constraint_prop::type;
176+
static constexpr int32_t _relative_cycle = _latency_constraint_prop::cycle;
177+
178+
int32_t _control_type_code =
179+
0; // Default
180+
// sycl::ext::oneapi::experimental::latency_control_type::none
181+
if constexpr (_control_type == sycl::ext::oneapi::experimental::
182+
latency_control_type::exact) {
183+
_control_type_code = 1;
184+
} else if constexpr (_control_type == sycl::ext::oneapi::experimental::
185+
latency_control_type::max) {
186+
_control_type_code = 2;
187+
} else if constexpr (_control_type == sycl::ext::oneapi::experimental::
188+
latency_control_type::min) {
189+
_control_type_code = 3;
197190
}
198191

199192
return *__latency_control_mem_wrapper((_T *)Ptr, _anchor_id, _target_anchor,
200-
_type, _cycle);
193+
_control_type_code, _relative_cycle);
201194
#else
202195
return *Ptr;
203196
#endif
204197
}
205198

206-
template <class... _Params, typename _T, access::address_space _space>
199+
template <typename _T, access::address_space _space, typename _propertiesT>
200+
static _T load(sycl::multi_ptr<_T, _space> Ptr, _propertiesT Properties) {
201+
return load<_T, _space, _propertiesT>(Ptr);
202+
}
203+
204+
template <typename _T, access::address_space _space,
205+
typename _propertiesT =
206+
decltype(sycl::ext::oneapi::experimental::properties{})>
207207
static void store(sycl::multi_ptr<_T, _space> Ptr, _T Val) {
208208
check_space<_space>();
209209
check_store();
210210
#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
211-
static constexpr auto _anchor_id =
212-
__GetValue<int, latency_anchor_id, _Params...>::value;
213-
static constexpr auto _constraint =
214-
__GetValue3<int, type, int, latency_constraint, _Params...>::value;
215-
216-
static constexpr int _target_anchor = std::get<0>(_constraint);
217-
static constexpr type _control_type = std::get<1>(_constraint);
218-
static constexpr int _cycle = std::get<2>(_constraint);
219-
int _type;
220-
if (_control_type == type::none) {
221-
_type = 0;
222-
} else if (_control_type == type::exact) {
223-
_type = 1;
224-
} else if (_control_type == type::max) {
225-
_type = 2;
226-
} else { // _control_type == type::min
227-
_type = 3;
211+
using _latency_anchor_id_prop = typename GetOrDefaultValT<
212+
_propertiesT, sycl::ext::oneapi::experimental::latency_anchor_id_key,
213+
sycl::ext::oneapi::experimental::latency_anchor_id_key::value_t<-1>>::
214+
type;
215+
using _latency_constraint_prop = typename GetOrDefaultValT<
216+
_propertiesT, sycl::ext::oneapi::experimental::latency_constraint_key,
217+
sycl::ext::oneapi::experimental::latency_constraint_key::value_t<
218+
0, sycl::ext::oneapi::experimental::latency_control_type::none,
219+
0>>::type;
220+
static constexpr int32_t _anchor_id = _latency_anchor_id_prop::value;
221+
static constexpr int32_t _target_anchor = _latency_constraint_prop::target;
222+
static constexpr sycl::ext::oneapi::experimental::latency_control_type
223+
_control_type = _latency_constraint_prop::type;
224+
static constexpr int32_t _relative_cycle = _latency_constraint_prop::cycle;
225+
226+
int32_t _control_type_code =
227+
0; // Default
228+
// sycl::ext::oneapi::experimental::latency_control_type::none
229+
if constexpr (_control_type == sycl::ext::oneapi::experimental::
230+
latency_control_type::exact) {
231+
_control_type_code = 1;
232+
} else if constexpr (_control_type == sycl::ext::oneapi::experimental::
233+
latency_control_type::max) {
234+
_control_type_code = 2;
235+
} else if constexpr (_control_type == sycl::ext::oneapi::experimental::
236+
latency_control_type::min) {
237+
_control_type_code = 3;
228238
}
229239

230-
*__latency_control_mem_wrapper((_T *)Ptr, _anchor_id, _target_anchor, _type,
231-
_cycle) = Val;
240+
*__latency_control_mem_wrapper((_T *)Ptr, _anchor_id, _target_anchor,
241+
_control_type_code, _relative_cycle) = Val;
232242
#else
233243
*Ptr = Val;
234244
#endif
235245
}
246+
247+
template <typename _T, access::address_space _space, typename _propertiesT>
248+
static void store(sycl::multi_ptr<_T, _space> Ptr, _T Val,
249+
_propertiesT Properties) {
250+
store<_T, _space, _propertiesT>(Ptr, Val);
251+
}
236252
...
237253
private:
238254
#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
@@ -253,7 +269,7 @@ private:
253269
254270
### Usage
255271
```c++
256-
// Added in version 2 of this extension.
272+
// Added in version 3 of this extension.
257273
#include <sycl/ext/intel/fpga_extensions.hpp>
258274
...
259275
sycl::buffer<int, 1> output_buffer(output_data, 1);
@@ -277,14 +293,15 @@ Queue.submit([&](sycl::handler &cgh) {
277293
sycl::ext::intel::experimental::statically_coalesce<false>>;
278294
279295
// The following load is anchor 1
280-
int Z = ExpPrefetchingLSU::load<
281-
sycl::ext::intel::experimental::latency_anchor_id<1>>(input_ptr + 2);
296+
int Z = ExpPrefetchingLSU::load(
297+
input_ptr + 2, sycl::ext::oneapi::experimental::properties(
298+
sycl::ext::oneapi::experimental::latency_anchor_id<1>));
282299
283300
// The following store occurs exactly 5 cycles after the anchor 1 read
284-
ExpBurstCoalescedLSU::store<
285-
sycl::ext::intel::experimental::latency_constraint<
286-
1, sycl::ext::intel::experimental::type::exact, 5>>(output_ptr + 2,
287-
Z);
301+
ExpBurstCoalescedLSU::store(output_ptr + 2, Z,
302+
sycl::ext::oneapi::experimental::properties(
303+
sycl::ext::oneapi::experimental::latency_constraint<
304+
1, sycl::ext::oneapi::experimental::latency_control_type::exact, 5>));
288305
});
289306
});
290307
...
@@ -304,3 +321,4 @@ extension's APIs the implementation supports.
304321
|:---- |:---------:|
305322
|1 |Initial extension version. Base features are supported.|
306323
|2 |Add experimental latency control API.|
324+
|3 |Update the experimental latency control API to use property list.|

0 commit comments

Comments
 (0)