Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 04204e7

Browse files
committed
[ESIMD] Add a test for lsc_slm_block_load with merging semantics
Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent d61a45f commit 04204e7

File tree

2 files changed

+19
-10
lines changed

2 files changed

+19
-10
lines changed

SYCL/ESIMD/lsc/Inputs/lsc_slm_load.hpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,17 @@ bool test(queue Q, uint32_t PMask = ~0) {
7777
barrier();
7878

7979
if constexpr (Transpose) {
80-
auto Vals = lsc_slm_block_load<T, VL, DS>(LID * VL * sizeof(T));
81-
Vals.copy_to(Out + GID * VL);
80+
if constexpr (TestMergeOperand) {
81+
simd_mask<1> Pred =
82+
(GID & 0x1) == 0; // Do actual load of even elements.
83+
simd<T, VL> OldValues(GID, 1);
84+
auto Vals = lsc_slm_block_load<T, VL, DS>(LID * VL * sizeof(T), Pred,
85+
OldValues);
86+
Vals.copy_to(Out + GID * VL);
87+
} else {
88+
auto Vals = lsc_slm_block_load<T, VL, DS>(LID * VL * sizeof(T));
89+
Vals.copy_to(Out + GID * VL);
90+
}
8291
} else {
8392
simd<uint32_t, VL> Offsets(LID * VL * NChannels * sizeof(T),
8493
NChannels * sizeof(T));
@@ -111,7 +120,12 @@ bool test(queue Q, uint32_t PMask = ~0) {
111120
for (uint32_t I = 0; I < OutSize; I++) {
112121
uint32_t GroupId = I / (LocalRange * VL * NChannels);
113122
uint32_t LID = I % (LocalRange * VL * NChannels);
123+
uint32_t GID = I / VL;
124+
bool Pred = (GID & 0x1) == 0;
114125
T ExpectedVal = GroupId * 1000000 + LID;
126+
if (TestMergeOperand && !Pred)
127+
ExpectedVal = GID + (I % VL);
128+
115129
if (Out[I] != ExpectedVal && NErrors++ < 32) {
116130
std::cout << "Error: " << I << ": Value = " << Out[I]
117131
<< ", Expected value = " << ExpectedVal << std::endl;

SYCL/ESIMD/lsc/lsc_slm_block_load.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,10 @@
22
// RUN: %clangxx -fsycl %s -o %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
44

5-
// This test verifies the correctness of LSC intrinsics loading
6-
// from SLM memory.
5+
// This test verifies the correctness of LSC SLM block load intrinsics.
76

87
#include "Inputs/lsc_slm_load.hpp"
98

10-
// This test verifies the correctness of LSC SLM block load intrinsics.
11-
129
template <typename T, bool TestMerging> bool test_load(queue Q) {
1310
constexpr bool Transpose = true;
1411
constexpr int VS = 1;
@@ -36,10 +33,8 @@ int main() {
3633
Passed &= test_load<uint32_t, !TestMerging>(Q);
3734
Passed &= test_load<uint64_t, !TestMerging>(Q);
3835

39-
// TODO: Enable the test with 'TestMerging' when lsc_slm_block_load() with
40-
// 'old_values' operand is supported.
41-
// Passed &= test_load<uint32_t, TestMerging>(Q);
42-
// Passed &= test_load<uint64_t, TestMerging>(Q);
36+
Passed &= test_load<uint32_t, TestMerging>(Q);
37+
Passed &= test_load<uint64_t, TestMerging>(Q);
4338

4439
// TODO: Enable the test with 1- and 2-byte element types, with floating point
4540
// types when lsc_slm_block_load() API is ready.

0 commit comments

Comments
 (0)