Skip to content

Commit 27c9546

Browse files
authored
[SYCL][ESIMD] Implement scatter for local accessors accepting compile time properties (intel#12675)
1 parent f316273 commit 27c9546

File tree

6 files changed

+542
-5
lines changed

6 files changed

+542
-5
lines changed

sycl/include/sycl/ext/intel/esimd/memory.hpp

Lines changed: 193 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4378,9 +4378,8 @@ slm_scatter(OffsetSimdViewT byte_offsets, simd<T, N> vals,
43784378
/// void slm_scatter(
43794379
/// OffsetSimdViewT byte_offsets, simd<T, N> vals,
43804380
/// PropertyListT props = {}); // (slm-sc-4)
4381-
/// Loads ("gathers") elements of the type 'T' from Shared Local Memory
4382-
/// locations addressed by byte offsets \p byte_offsets, and returns the loaded
4383-
/// elements.
4381+
/// Stores ("scatters") elements of the type 'T' to Shared Local Memory
4382+
/// locations addressed by byte offsets \p byte_offsets.
43844383
/// @tparam T Element type.
43854384
/// @tparam N Number of elements to read.
43864385
/// @tparam VS Vector size. It can also be read as the number of reads per each
@@ -8034,6 +8033,196 @@ __ESIMD_API
80348033
offsets + glob_offset + __ESIMD_DNS::localAccessorToOffset(acc), mask);
80358034
}
80368035

8036+
/// Variant of scatter that uses local accessor as a parameter
8037+
/// template <typename T, int N, int VS = 1, typename AccessorT,
8038+
/// typename PropertyListT = empty_properties_t>
8039+
/// void scatter(AccessorT acc,
8040+
/// simd<uint32_t, N / VS> byte_offsets,
8041+
/// simd<T, N> vals,
8042+
/// simd_mask<N / VS> mask,
8043+
/// PropertyListT props = {}); // (lacc-sc-1)
8044+
8045+
/// template <typename T, int N, int VS = 1, typename AccessorT,
8046+
/// typename PropertyListT = empty_properties_t>
8047+
/// void scatter(AccessorT acc,
8048+
/// simd<uint32_t, N / VS> byte_offsets,
8049+
/// simd<T, N> vals,
8050+
/// PropertyListT props = {}); // (lacc-sc-2)
8051+
8052+
/// The next two functions are similar to lacc-sc-{1,2} with the 'byte_offsets'
8053+
/// parameter represerented as 'simd_view'.
8054+
8055+
/// template <typename T, int N, int VS = 1, typename AccessorT,
8056+
/// typename OffsetSimdViewT,
8057+
/// typename PropertyListT = empty_properties_t>
8058+
/// void scatter(AccessorT acc,
8059+
/// OffsetSimdViewT byte_offsets,
8060+
/// simd<T, N> vals,
8061+
/// simd_mask<N / VS> mask,
8062+
/// PropertyListT props = {}); // (lacc-sc-3)
8063+
8064+
/// template <typename T, int N, int VS = 1, typename OffsetSimdViewT,
8065+
/// typename AccessorT,
8066+
/// typename PropertyListT = empty_properties_t>
8067+
/// void scatter(AccessorT acc,
8068+
/// OffsetSimdViewT byte_offsets,
8069+
/// simd<T, N> vals,
8070+
/// PropertyListT props = {}); // (lacc-sc-4)
8071+
8072+
/// template <typename T, int N, int VS = 1, typename AccessorT,
8073+
/// typename PropertyListT = empty_properties_t>
8074+
/// void scatter(AccessorT acc,
8075+
/// simd<uint32_t, N / VS> byte_offsets,
8076+
/// simd<T, N> vals,
8077+
/// simd_mask<N / VS> mask,
8078+
/// PropertyListT props = {}); // (lacc-sc-1)
8079+
///
8080+
/// Writes ("scatters") elements of the input vector to memory locations
8081+
/// addressed by the local accessor \p acc and byte offsets \p byte_offsets.
8082+
/// Access to any element's memory location can be disabled via
8083+
/// the input mask.
8084+
/// @tparam T Element type.
8085+
/// @tparam N Number of elements to write.
8086+
/// @tparam VS Vector size. It can also be read as the number of writes per each
8087+
/// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
8088+
/// only on DG2 and PVC and only for 4- and 8-byte element vectors.
8089+
/// @param acc The accessor to scatter to.
8090+
/// @param byte_offsets the vector of 32-bit offsets in bytes.
8091+
/// For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
8092+
/// If the alignment property is not passed, then it is assumed that each
8093+
/// accessed address is aligned by element-size.
8094+
/// @param vals The vector to scatter.
8095+
/// @param mask The access mask.
8096+
/// @param props The optional compile-time properties. Only 'alignment'
8097+
/// property is used.
8098+
template <typename T, int N, int VS = 1, typename AccessorT,
8099+
typename PropertyListT =
8100+
ext::oneapi::experimental::detail::empty_properties_t>
8101+
__ESIMD_API std::enable_if_t<
8102+
detail::is_local_accessor_with_v<AccessorT,
8103+
detail::accessor_mode_cap::can_write> &&
8104+
ext::oneapi::experimental::is_property_list_v<PropertyListT>>
8105+
scatter(AccessorT acc, simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals,
8106+
simd_mask<N / VS> mask, PropertyListT props = {}) {
8107+
slm_scatter<T, N, VS>(byte_offsets + __ESIMD_DNS::localAccessorToOffset(acc),
8108+
vals, mask, props);
8109+
}
8110+
8111+
/// template <typename T, int N, int VS = 1, typename AccessorT,
8112+
/// typename PropertyListT = empty_properties_t>
8113+
/// void scatter(AccessorT acc,
8114+
/// simd<uint32_t, N / VS> byte_offsets,
8115+
/// simd<T, N> vals,
8116+
/// PropertyListT props = {}); // (lacc-sc-2)
8117+
///
8118+
/// Writes ("scatters") elements of the input vector to memory locations
8119+
/// addressed by the local accessor \p acc and byte offsets \p byte_offsets.
8120+
/// @tparam T Element type.
8121+
/// @tparam N Number of elements to write.
8122+
/// @tparam VS Vector size. It can also be read as the number of writes per each
8123+
/// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
8124+
/// only on DG2 and PVC and only for 4- and 8-byte element vectors.
8125+
/// @param acc The accessor to scatter to.
8126+
/// @param byte_offsets the vector of 32-bit offsets in bytes.
8127+
/// For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
8128+
/// If the alignment property is not passed, then it is assumed that each
8129+
/// accessed address is aligned by element-size.
8130+
/// @param vals The vector to scatter.
8131+
/// @param props The optional compile-time properties. Only 'alignment'
8132+
/// property is used.
8133+
template <typename T, int N, int VS = 1, typename AccessorT,
8134+
typename PropertyListT =
8135+
ext::oneapi::experimental::detail::empty_properties_t>
8136+
__ESIMD_API std::enable_if_t<
8137+
detail::is_local_accessor_with_v<AccessorT,
8138+
detail::accessor_mode_cap::can_write> &&
8139+
ext::oneapi::experimental::is_property_list_v<PropertyListT>>
8140+
scatter(AccessorT acc, simd<uint32_t, N / VS> byte_offsets, simd<T, N> vals,
8141+
PropertyListT props = {}) {
8142+
simd_mask<N / VS> Mask = 1;
8143+
scatter<T, N, VS>(acc, byte_offsets, vals, Mask, props);
8144+
}
8145+
8146+
/// template <typename T, int N, int VS = 1, typename AccessorT,
8147+
/// typename OffsetSimdViewT,
8148+
/// typename PropertyListT = empty_properties_t>
8149+
/// void scatter(AccessorT acc,
8150+
/// OffsetSimdViewT byte_offsets,
8151+
/// simd<T, N> vals,
8152+
/// simd_mask<N / VS> mask,
8153+
/// PropertyListT props = {}); // (lacc-sc-3)
8154+
///
8155+
/// Writes ("scatters") elements of the input vector to memory locations
8156+
/// addressed by the local accessor \p acc and byte offsets \p byte_offsets.
8157+
/// Access to any element's memory location can be disabled via the input mask.
8158+
/// @tparam T Element type.
8159+
/// @tparam N Number of elements to write.
8160+
/// @tparam VS Vector size. It can also be read as the number of writes per each
8161+
/// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
8162+
/// only on DG2 and PVC and only for 4- and 8-byte element vectors.
8163+
/// @param acc The accessor to scatter to.
8164+
/// @param byte_offsets the vector of 32-bit offsets in bytes
8165+
/// represented as a 'simd_view' object.
8166+
/// For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
8167+
/// If the alignment property is not passed, then it is assumed that each
8168+
/// accessed address is aligned by element-size.
8169+
/// @param vals The vector to scatter.
8170+
/// @param mask The access mask.
8171+
/// @param props The optional compile-time properties. Only 'alignment'
8172+
/// and cache hint properties are used.
8173+
template <typename T, int N, int VS = 1, typename OffsetSimdViewT,
8174+
typename AccessorT,
8175+
typename PropertyListT =
8176+
ext::oneapi::experimental::detail::empty_properties_t>
8177+
__ESIMD_API std::enable_if_t<
8178+
detail::is_local_accessor_with_v<AccessorT,
8179+
detail::accessor_mode_cap::can_write> &&
8180+
detail::is_simd_view_type_v<OffsetSimdViewT> &&
8181+
ext::oneapi::experimental::is_property_list_v<PropertyListT>>
8182+
scatter(AccessorT acc, OffsetSimdViewT byte_offsets, simd<T, N> vals,
8183+
simd_mask<N / VS> mask, PropertyListT props = {}) {
8184+
scatter<T, N, VS>(acc, byte_offsets.read(), vals, mask, props);
8185+
}
8186+
8187+
/// template <typename T, int N, int VS = 1, typename OffsetSimdViewT,
8188+
/// typename AccessorT,
8189+
/// typename PropertyListT = empty_properties_t>
8190+
/// void scatter(AccessorT acc,
8191+
/// OffsetSimdViewT byte_offsets,
8192+
/// simd<T, N> vals,
8193+
/// PropertyListT props = {}); // (lacc-sc-4)
8194+
///
8195+
/// Writes ("scatters") elements of the input vector to memory locations
8196+
/// addressed by the local accessor \p acc and byte offsets \p byte_offsets.
8197+
/// @tparam T Element type.
8198+
/// @tparam N Number of elements to write.
8199+
/// @tparam VS Vector size. It can also be read as the number of writes per each
8200+
/// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
8201+
/// only on DG2 and PVC and only for 4- and 8-byte element vectors.
8202+
/// @param acc The accessor to scatter to.
8203+
/// @param byte_offsets the vector of 32-bit offsets in bytes
8204+
/// represented as a 'simd_view' object.
8205+
/// For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
8206+
/// If the alignment property is not passed, then it is assumed that each
8207+
/// accessed address is aligned by element-size.
8208+
/// @param vals The vector to scatter.
8209+
/// @param props The optional compile-time properties. Only 'alignment'
8210+
/// property is used.
8211+
template <typename T, int N, int VS = 1, typename OffsetSimdViewT,
8212+
typename AccessorT,
8213+
typename PropertyListT =
8214+
ext::oneapi::experimental::detail::empty_properties_t>
8215+
__ESIMD_API std::enable_if_t<
8216+
detail::is_local_accessor_with_v<AccessorT,
8217+
detail::accessor_mode_cap::can_write> &&
8218+
detail::is_simd_view_type_v<OffsetSimdViewT> &&
8219+
ext::oneapi::experimental::is_property_list_v<PropertyListT>>
8220+
scatter(AccessorT acc, OffsetSimdViewT byte_offsets, simd<T, N> vals,
8221+
PropertyListT props = {}) {
8222+
simd_mask<N / VS> Mask = 1;
8223+
scatter<T, N, VS>(acc, byte_offsets.read(), vals, Mask, props);
8224+
}
8225+
80378226
/// Variant of scatter that uses local accessor as a parameter
80388227
///
80398228
/// Writes elements of a \ref simd object into an accessor at given offsets.
@@ -8056,7 +8245,7 @@ template <typename T, int N, typename AccessorTy>
80568245
__ESIMD_API std::enable_if_t<detail::is_local_accessor_with_v<
80578246
AccessorTy, detail::accessor_mode_cap::can_write>>
80588247
scatter(AccessorTy acc, simd<uint32_t, N> offsets, simd<T, N> vals,
8059-
uint32_t glob_offset = 0, simd_mask<N> mask = 1) {
8248+
uint32_t glob_offset, simd_mask<N> mask = 1) {
80608249
slm_scatter<T, N>(offsets + glob_offset +
80618250
__ESIMD_DNS::localAccessorToOffset(acc),
80628251
vals, mask);

sycl/test-e2e/ESIMD/unified_memory_api/Inputs/gather.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -796,7 +796,6 @@ bool testLACC(queue Q, uint32_t MaskStride, PropertiesT) {
796796
In[I] = esimd_test::getRandomValue<T>();
797797

798798
try {
799-
buffer<T, 1> InBuf(In, Size * 2);
800799
Q.submit([&](handler &CGH) {
801800
// Allocate a bit more to safely initialize it with 8-element chunks.
802801
constexpr uint32_t SLMSize = (Threads * N + 8) * sizeof(T);

0 commit comments

Comments
 (0)