@@ -2037,6 +2037,11 @@ lsc_load_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight,
2037
2037
detail::check_lsc_cache_hint<detail::lsc_action::load, L1H, L3H>();
2038
2038
detail::check_lsc_block_2d_restrictions<T, BlockWidth, BlockHeight, NBlocks,
2039
2039
Transposed, Transformed>();
2040
+ // For Load BlockWidth is padded up to the next power-of-two value.
2041
+ // For Load with Transpose the pre-operation BlockHeight is padded up
2042
+ // to the next power-of-two value.
2043
+ // For Load with Transform pre-operation BlockHeight is padded up to
2044
+ // multiple of K, where K = 4B / sizeof(T).
2040
2045
constexpr int ElemsPerDword = 4 / sizeof (T);
2041
2046
constexpr int GRFRowSize = Transposed ? BlockHeight
2042
2047
: Transformed ? BlockWidth * ElemsPerDword
@@ -2075,7 +2080,21 @@ lsc_load_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight,
2075
2080
return Raw;
2076
2081
} else {
2077
2082
// HW restrictions force data which is read to contain padding filled with
2078
- // garbage for 2d lsc loads. This code eliminates such padding.
2083
+ // zeros for 2d lsc loads. This code eliminates such padding.
2084
+
2085
+ // For example, 2D block load of 5 elements of 1 byte data type will
2086
+ // take 8 bytes per row for each block.
2087
+ //
2088
+ // +----+----+----+----+----+----+-----+-----+
2089
+ // | 00 | 01 | 02 | 03 | 04 | 05 | 06* | 07* |
2090
+ // +----+----+----+----+----+----+-----+-----+
2091
+ // | 10 | 11 | 12 | 13 | 14 | 15 | 16* | 17* |
2092
+ // +----+----+----+----+----+----+-----+-----+
2093
+ // | 20 | 21 | 22 | 23 | 24 | 25 | 26* | 27* |
2094
+ // +----+----+----+----+----+----+-----+-----+
2095
+ // | 30 | 31 | 32 | 33 | 34 | 35 | 36* | 37* |
2096
+ // +----+----+----+----+----+----+-----+-----+
2097
+ // * signifies the padded element.
2079
2098
2080
2099
__ESIMD_NS::simd<T, DstElements> Dst;
2081
2100
@@ -2209,6 +2228,8 @@ __ESIMD_API void lsc_store_2d(T *Ptr, unsigned SurfaceWidth,
2209
2228
if constexpr (BlockHeight * Pitch == N) {
2210
2229
Raw = Vals;
2211
2230
} else {
2231
+ // For store with padding, allocate the block with padding, and place
2232
+ // original data there.
2212
2233
auto Data2D = Vals.template bit_cast_view <T, BlockHeight, BlockWidth>();
2213
2234
auto Raw2D = Raw.template bit_cast_view <T, BlockHeight, Pitch>();
2214
2235
Raw2D.template select <BlockHeight, 1 , BlockWidth, 1 >(0 , 0 ) = Data2D;
@@ -2508,7 +2529,7 @@ ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T, N> lsc_load_2d(
2508
2529
return Raw;
2509
2530
} else {
2510
2531
// HW restrictions force data which is read to contain padding filled with
2511
- // garbage for 2d lsc loads. This code eliminates such padding.
2532
+ // zeros for 2d lsc loads. This code eliminates such padding.
2512
2533
2513
2534
__ESIMD_NS::simd<T, DstElements> Dst;
2514
2535
0 commit comments