Skip to content

Commit f578eef

Browse files
authored
[SYCL][ESIMD][NFC] Add comments for 2d block load/store unpadding (#9403)
1 parent a3e4b9e commit f578eef

File tree

1 file changed

+23
-2
lines changed
  • sycl/include/sycl/ext/intel/experimental/esimd

1 file changed

+23
-2
lines changed

sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2037,6 +2037,11 @@ lsc_load_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight,
20372037
detail::check_lsc_cache_hint<detail::lsc_action::load, L1H, L3H>();
20382038
detail::check_lsc_block_2d_restrictions<T, BlockWidth, BlockHeight, NBlocks,
20392039
Transposed, Transformed>();
2040+
// For Load BlockWidth is padded up to the next power-of-two value.
2041+
// For Load with Transpose the pre-operation BlockHeight is padded up
2042+
// to the next power-of-two value.
2043+
// For Load with Transform pre-operation BlockHeight is padded up to
2044+
// multiple of K, where K = 4B / sizeof(T).
20402045
constexpr int ElemsPerDword = 4 / sizeof(T);
20412046
constexpr int GRFRowSize = Transposed ? BlockHeight
20422047
: Transformed ? BlockWidth * ElemsPerDword
@@ -2075,7 +2080,21 @@ lsc_load_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight,
20752080
return Raw;
20762081
} else {
20772082
// HW restrictions force data which is read to contain padding filled with
2078-
// garbage for 2d lsc loads. This code eliminates such padding.
2083+
// zeros for 2d lsc loads. This code eliminates such padding.
2084+
2085+
// For example, 2D block load of 5 elements of 1 byte data type will
2086+
// take 8 bytes per row for each block.
2087+
//
2088+
// +----+----+----+----+----+----+-----+-----+
2089+
// | 00 | 01 | 02 | 03 | 04 | 05 | 06* | 07* |
2090+
// +----+----+----+----+----+----+-----+-----+
2091+
// | 10 | 11 | 12 | 13 | 14 | 15 | 16* | 17* |
2092+
// +----+----+----+----+----+----+-----+-----+
2093+
// | 20 | 21 | 22 | 23 | 24 | 25 | 26* | 27* |
2094+
// +----+----+----+----+----+----+-----+-----+
2095+
// | 30 | 31 | 32 | 33 | 34 | 35 | 36* | 37* |
2096+
// +----+----+----+----+----+----+-----+-----+
2097+
// * signifies the padded element.
20792098

20802099
__ESIMD_NS::simd<T, DstElements> Dst;
20812100

@@ -2209,6 +2228,8 @@ __ESIMD_API void lsc_store_2d(T *Ptr, unsigned SurfaceWidth,
22092228
if constexpr (BlockHeight * Pitch == N) {
22102229
Raw = Vals;
22112230
} else {
2231+
// For store with padding, allocate the block with padding, and place
2232+
// original data there.
22122233
auto Data2D = Vals.template bit_cast_view<T, BlockHeight, BlockWidth>();
22132234
auto Raw2D = Raw.template bit_cast_view<T, BlockHeight, Pitch>();
22142235
Raw2D.template select<BlockHeight, 1, BlockWidth, 1>(0, 0) = Data2D;
@@ -2508,7 +2529,7 @@ ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T, N> lsc_load_2d(
25082529
return Raw;
25092530
} else {
25102531
// HW restrictions force data which is read to contain padding filled with
2511-
// garbage for 2d lsc loads. This code eliminates such padding.
2532+
// zeros for 2d lsc loads. This code eliminates such padding.
25122533

25132534
__ESIMD_NS::simd<T, DstElements> Dst;
25142535

0 commit comments

Comments
 (0)