@@ -4378,9 +4378,8 @@ slm_scatter(OffsetSimdViewT byte_offsets, simd<T, N> vals,
4378
4378
// / void slm_scatter(
4379
4379
// / OffsetSimdViewT byte_offsets, simd<T, N> vals,
4380
4380
// / PropertyListT props = {}); // (slm-sc-4)
4381
- // / Loads ("gathers") elements of the type 'T' from Shared Local Memory
4382
- // / locations addressed by byte offsets \p byte_offsets, and returns the loaded
4383
- // / elements.
4381
+ // / Stores ("scatters") elements of the type 'T' to Shared Local Memory
4382
+ // / locations addressed by byte offsets \p byte_offsets.
4384
4383
// / @tparam T Element type.
4385
4384
// / @tparam N Number of elements to read.
4386
4385
// / @tparam VS Vector size. It can also be read as the number of reads per each
@@ -8034,6 +8033,196 @@ __ESIMD_API
8034
8033
offsets + glob_offset + __ESIMD_DNS::localAccessorToOffset (acc), mask);
8035
8034
}
8036
8035
8036
+ // / Variant of scatter that uses local accessor as a parameter
8037
+ // / template <typename T, int N, int VS = 1, typename AccessorT,
8038
+ // / typename PropertyListT = empty_properties_t>
8039
+ // / void scatter(AccessorT acc,
8040
+ // / simd<uint32_t, N / VS> byte_offsets,
8041
+ // / simd<T, N> vals,
8042
+ // / simd_mask<N / VS> mask,
8043
+ // / PropertyListT props = {}); // (lacc-sc-1)
8044
+
8045
+ // / template <typename T, int N, int VS = 1, typename AccessorT,
8046
+ // / typename PropertyListT = empty_properties_t>
8047
+ // / void scatter(AccessorT acc,
8048
+ // / simd<uint32_t, N / VS> byte_offsets,
8049
+ // / simd<T, N> vals,
8050
+ // / PropertyListT props = {}); // (lacc-sc-2)
8051
+
8052
+ // / The next two functions are similar to lacc-sc-{1,2} with the 'byte_offsets'
8053
+ // / parameter represerented as 'simd_view'.
8054
+
8055
+ // / template <typename T, int N, int VS = 1, typename AccessorT,
8056
+ // / typename OffsetSimdViewT,
8057
+ // / typename PropertyListT = empty_properties_t>
8058
+ // / void scatter(AccessorT acc,
8059
+ // / OffsetSimdViewT byte_offsets,
8060
+ // / simd<T, N> vals,
8061
+ // / simd_mask<N / VS> mask,
8062
+ // / PropertyListT props = {}); // (lacc-sc-3)
8063
+
8064
+ // / template <typename T, int N, int VS = 1, typename OffsetSimdViewT,
8065
+ // / typename AccessorT,
8066
+ // / typename PropertyListT = empty_properties_t>
8067
+ // / void scatter(AccessorT acc,
8068
+ // / OffsetSimdViewT byte_offsets,
8069
+ // / simd<T, N> vals,
8070
+ // / PropertyListT props = {}); // (lacc-sc-4)
8071
+
8072
+ // / template <typename T, int N, int VS = 1, typename AccessorT,
8073
+ // / typename PropertyListT = empty_properties_t>
8074
+ // / void scatter(AccessorT acc,
8075
+ // / simd<uint32_t, N / VS> byte_offsets,
8076
+ // / simd<T, N> vals,
8077
+ // / simd_mask<N / VS> mask,
8078
+ // / PropertyListT props = {}); // (lacc-sc-1)
8079
+ // /
8080
+ // / Writes ("scatters") elements of the input vector to memory locations
8081
+ // / addressed by the local accessor \p acc and byte offsets \p byte_offsets.
8082
+ // / Access to any element's memory location can be disabled via
8083
+ // / the input mask.
8084
+ // / @tparam T Element type.
8085
+ // / @tparam N Number of elements to write.
8086
+ // / @tparam VS Vector size. It can also be read as the number of writes per each
8087
+ // / address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
8088
+ // / only on DG2 and PVC and only for 4- and 8-byte element vectors.
8089
+ // / @param acc The accessor to scatter to.
8090
+ // / @param byte_offsets the vector of 32-bit offsets in bytes.
8091
+ // / For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
8092
+ // / If the alignment property is not passed, then it is assumed that each
8093
+ // / accessed address is aligned by element-size.
8094
+ // / @param vals The vector to scatter.
8095
+ // / @param mask The access mask.
8096
+ // / @param props The optional compile-time properties. Only 'alignment'
8097
+ // / property is used.
8098
+ template <typename T, int N, int VS = 1 , typename AccessorT,
8099
+ typename PropertyListT =
8100
+ ext::oneapi::experimental::detail::empty_properties_t >
8101
+ __ESIMD_API std::enable_if_t <
8102
+ detail::is_local_accessor_with_v<AccessorT,
8103
+ detail::accessor_mode_cap::can_write> &&
8104
+ ext::oneapi::experimental::is_property_list_v<PropertyListT>>
8105
+ scatter (AccessorT acc, simd<uint32_t , N / VS> byte_offsets, simd<T, N> vals,
8106
+ simd_mask<N / VS> mask, PropertyListT props = {}) {
8107
+ slm_scatter<T, N, VS>(byte_offsets + __ESIMD_DNS::localAccessorToOffset (acc),
8108
+ vals, mask, props);
8109
+ }
8110
+
8111
+ // / template <typename T, int N, int VS = 1, typename AccessorT,
8112
+ // / typename PropertyListT = empty_properties_t>
8113
+ // / void scatter(AccessorT acc,
8114
+ // / simd<uint32_t, N / VS> byte_offsets,
8115
+ // / simd<T, N> vals,
8116
+ // / PropertyListT props = {}); // (lacc-sc-2)
8117
+ // /
8118
+ // / Writes ("scatters") elements of the input vector to memory locations
8119
+ // / addressed by the local accessor \p acc and byte offsets \p byte_offsets.
8120
+ // / @tparam T Element type.
8121
+ // / @tparam N Number of elements to write.
8122
+ // / @tparam VS Vector size. It can also be read as the number of writes per each
8123
+ // / address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
8124
+ // / only on DG2 and PVC and only for 4- and 8-byte element vectors.
8125
+ // / @param acc The accessor to scatter to.
8126
+ // / @param byte_offsets the vector of 32-bit offsets in bytes.
8127
+ // / For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
8128
+ // / If the alignment property is not passed, then it is assumed that each
8129
+ // / accessed address is aligned by element-size.
8130
+ // / @param vals The vector to scatter.
8131
+ // / @param props The optional compile-time properties. Only 'alignment'
8132
+ // / property is used.
8133
+ template <typename T, int N, int VS = 1 , typename AccessorT,
8134
+ typename PropertyListT =
8135
+ ext::oneapi::experimental::detail::empty_properties_t >
8136
+ __ESIMD_API std::enable_if_t <
8137
+ detail::is_local_accessor_with_v<AccessorT,
8138
+ detail::accessor_mode_cap::can_write> &&
8139
+ ext::oneapi::experimental::is_property_list_v<PropertyListT>>
8140
+ scatter (AccessorT acc, simd<uint32_t , N / VS> byte_offsets, simd<T, N> vals,
8141
+ PropertyListT props = {}) {
8142
+ simd_mask<N / VS> Mask = 1 ;
8143
+ scatter<T, N, VS>(acc, byte_offsets, vals, Mask, props);
8144
+ }
8145
+
8146
+ // / template <typename T, int N, int VS = 1, typename AccessorT,
8147
+ // / typename OffsetSimdViewT,
8148
+ // / typename PropertyListT = empty_properties_t>
8149
+ // / void scatter(AccessorT acc,
8150
+ // / OffsetSimdViewT byte_offsets,
8151
+ // / simd<T, N> vals,
8152
+ // / simd_mask<N / VS> mask,
8153
+ // / PropertyListT props = {}); // (lacc-sc-3)
8154
+ // /
8155
+ // / Writes ("scatters") elements of the input vector to memory locations
8156
+ // / addressed by the local accessor \p acc and byte offsets \p byte_offsets.
8157
+ // / Access to any element's memory location can be disabled via the input mask.
8158
+ // / @tparam T Element type.
8159
+ // / @tparam N Number of elements to write.
8160
+ // / @tparam VS Vector size. It can also be read as the number of writes per each
8161
+ // / address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
8162
+ // / only on DG2 and PVC and only for 4- and 8-byte element vectors.
8163
+ // / @param acc The accessor to scatter to.
8164
+ // / @param byte_offsets the vector of 32-bit offsets in bytes
8165
+ // / represented as a 'simd_view' object.
8166
+ // / For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
8167
+ // / If the alignment property is not passed, then it is assumed that each
8168
+ // / accessed address is aligned by element-size.
8169
+ // / @param vals The vector to scatter.
8170
+ // / @param mask The access mask.
8171
+ // / @param props The optional compile-time properties. Only 'alignment'
8172
+ // / and cache hint properties are used.
8173
+ template <typename T, int N, int VS = 1 , typename OffsetSimdViewT,
8174
+ typename AccessorT,
8175
+ typename PropertyListT =
8176
+ ext::oneapi::experimental::detail::empty_properties_t >
8177
+ __ESIMD_API std::enable_if_t <
8178
+ detail::is_local_accessor_with_v<AccessorT,
8179
+ detail::accessor_mode_cap::can_write> &&
8180
+ detail::is_simd_view_type_v<OffsetSimdViewT> &&
8181
+ ext::oneapi::experimental::is_property_list_v<PropertyListT>>
8182
+ scatter (AccessorT acc, OffsetSimdViewT byte_offsets, simd<T, N> vals,
8183
+ simd_mask<N / VS> mask, PropertyListT props = {}) {
8184
+ scatter<T, N, VS>(acc, byte_offsets.read (), vals, mask, props);
8185
+ }
8186
+
8187
+ // / template <typename T, int N, int VS = 1, typename OffsetSimdViewT,
8188
+ // / typename AccessorT,
8189
+ // / typename PropertyListT = empty_properties_t>
8190
+ // / void scatter(AccessorT acc,
8191
+ // / OffsetSimdViewT byte_offsets,
8192
+ // / simd<T, N> vals,
8193
+ // / PropertyListT props = {}); // (lacc-sc-4)
8194
+ // /
8195
+ // / Writes ("scatters") elements of the input vector to memory locations
8196
+ // / addressed by the local accessor \p acc and byte offsets \p byte_offsets.
8197
+ // / @tparam T Element type.
8198
+ // / @tparam N Number of elements to write.
8199
+ // / @tparam VS Vector size. It can also be read as the number of writes per each
8200
+ // / address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
8201
+ // / only on DG2 and PVC and only for 4- and 8-byte element vectors.
8202
+ // / @param acc The accessor to scatter to.
8203
+ // / @param byte_offsets the vector of 32-bit offsets in bytes
8204
+ // / represented as a 'simd_view' object.
8205
+ // / For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
8206
+ // / If the alignment property is not passed, then it is assumed that each
8207
+ // / accessed address is aligned by element-size.
8208
+ // / @param vals The vector to scatter.
8209
+ // / @param props The optional compile-time properties. Only 'alignment'
8210
+ // / property is used.
8211
+ template <typename T, int N, int VS = 1 , typename OffsetSimdViewT,
8212
+ typename AccessorT,
8213
+ typename PropertyListT =
8214
+ ext::oneapi::experimental::detail::empty_properties_t >
8215
+ __ESIMD_API std::enable_if_t <
8216
+ detail::is_local_accessor_with_v<AccessorT,
8217
+ detail::accessor_mode_cap::can_write> &&
8218
+ detail::is_simd_view_type_v<OffsetSimdViewT> &&
8219
+ ext::oneapi::experimental::is_property_list_v<PropertyListT>>
8220
+ scatter (AccessorT acc, OffsetSimdViewT byte_offsets, simd<T, N> vals,
8221
+ PropertyListT props = {}) {
8222
+ simd_mask<N / VS> Mask = 1 ;
8223
+ scatter<T, N, VS>(acc, byte_offsets.read (), vals, Mask, props);
8224
+ }
8225
+
8037
8226
// / Variant of scatter that uses local accessor as a parameter
8038
8227
// /
8039
8228
// / Writes elements of a \ref simd object into an accessor at given offsets.
@@ -8056,7 +8245,7 @@ template <typename T, int N, typename AccessorTy>
8056
8245
__ESIMD_API std::enable_if_t <detail::is_local_accessor_with_v<
8057
8246
AccessorTy, detail::accessor_mode_cap::can_write>>
8058
8247
scatter (AccessorTy acc, simd<uint32_t , N> offsets, simd<T, N> vals,
8059
- uint32_t glob_offset = 0 , simd_mask<N> mask = 1 ) {
8248
+ uint32_t glob_offset, simd_mask<N> mask = 1 ) {
8060
8249
slm_scatter<T, N>(offsets + glob_offset +
8061
8250
__ESIMD_DNS::localAccessorToOffset (acc),
8062
8251
vals, mask);
0 commit comments