Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL][ESIMD] Test for a fix of error when a scalar offset is provided as a parameter to the API #1534

Merged
merged 14 commits into from
Jan 31, 2023
Merged
21 changes: 20 additions & 1 deletion SYCL/ESIMD/api/svm_gather_scatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@ template <typename T, int N> bool test(queue &Q) {
try {
Q.submit([&](handler &CGH) {
CGH.parallel_for(sycl::range<1>{1}, [=](id<1>) SYCL_ESIMD_KERNEL {
#ifndef USE_SCALAR_OFFSET
simd<Toffset, N> Offsets(0u, sizeof(T));
#else
Toffset Offsets = 0;
#endif
scatter<T, N>(Dst, Offsets, gather<T, N>(Src, Offsets));
});
}).wait();
Expand All @@ -72,7 +76,11 @@ template <typename T, int N> bool test(queue &Q) {

unsigned NumErrs = 0;
for (int I = 0; I < N; ++I)
#ifndef USE_SCALAR_OFFSET
if (Dst[I] != Src[I])
#else
if ((Dst[I] != Src[I] && I == 0) || (I != 0 && Dst[I] != 0))
#endif
if (++NumErrs <= 10)
std::cout << "failed at " << I << ": " << Dst[I]
<< " (Dst) != " << Src[I] << " (Src)\n";
Expand All @@ -90,48 +98,59 @@ int main(void) {
bool Pass = true;

Pass &= test<int8_t, 1>(Q);
#ifndef USE_SCALAR_OFFSET

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a note: This situation reveals that this test is written in a bad way: gather and scatter are used in one kernel and gather is input for gather. This makes it impossible to test gather with scalar offset and N>1 and it creates opportunity for double-error (1 in gather, 1 in scatter) giving a pass.
I had some changes in my local ws splitting this test to gather and scatter.

Pass &= test<int8_t, 2>(Q);
Pass &= test<int8_t, 4>(Q);
Pass &= test<int8_t, 8>(Q);
Pass &= test<int8_t, 16>(Q);
Pass &= test<int8_t, 32>(Q);
#endif

Pass &= test<int16_t, 1>(Q);
#ifndef USE_SCALAR_OFFSET
Pass &= test<int16_t, 2>(Q);
Pass &= test<int16_t, 4>(Q);
Pass &= test<int16_t, 8>(Q);
Pass &= test<int16_t, 16>(Q);
Pass &= test<int16_t, 32>(Q);
#endif

Pass &= test<int32_t, 1>(Q);
#ifndef USE_SCALAR_OFFSET
Pass &= test<int32_t, 2>(Q);
Pass &= test<int32_t, 4>(Q);
Pass &= test<int32_t, 8>(Q);
Pass &= test<int32_t, 16>(Q);
Pass &= test<int32_t, 32>(Q);

#endif
if (Dev.has(aspect::fp16)) {
Pass &= test<half, 1>(Q);
#ifndef USE_SCALAR_OFFSET
Pass &= test<half, 2>(Q);
Pass &= test<half, 4>(Q);
Pass &= test<half, 8>(Q);
Pass &= test<half, 16>(Q);
Pass &= test<half, 32>(Q);
#endif
}

Pass &= test<bfloat16, 1>(Q);
#ifndef USE_SCALAR_OFFSET
Pass &= test<bfloat16, 2>(Q);
Pass &= test<bfloat16, 4>(Q);
Pass &= test<bfloat16, 8>(Q);
Pass &= test<bfloat16, 16>(Q);
Pass &= test<bfloat16, 32>(Q);
#endif
#ifdef USE_TF32
Pass &= test<tfloat32, 1>(Q);
#ifndef USE_SCALAR_OFFSET
Pass &= test<tfloat32, 2>(Q);
Pass &= test<tfloat32, 4>(Q);
Pass &= test<tfloat32, 8>(Q);
Pass &= test<tfloat32, 16>(Q);
Pass &= test<tfloat32, 32>(Q);
#endif
#endif

std::cout << (Pass ? "Test Passed\n" : "Test FAILED\n");
Expand Down
18 changes: 18 additions & 0 deletions SYCL/ESIMD/api/svm_gather_scatter_scalar_off.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//==------ svm_gather_scatter_scalar_off.cpp - DPC++ ESIMD on-device test -==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: gpu && !gpu-intel-pvc
// UNSUPPORTED: cuda || hip
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// Regression test for gather/scatter API.
// scalar offset variant of the test - uses scalar offset.

#define USE_SCALAR_OFFSET

#include "svm_gather_scatter.cpp"
27 changes: 27 additions & 0 deletions SYCL/ESIMD/dword_atomic_cmpxchg_scalar_off.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
//==-- dword_atomic_cmpxchg_scalar_off.cpp - DPC++ ESIMD on-device test -==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This test checks LSC atomic operations.
//===----------------------------------------------------------------------===//
// REQUIRES: gpu
// UNSUPPORTED: cuda || hip
// TODO: esimd_emulator fails due to random timeouts (_XFAIL_: esimd_emulator)
// UNSUPPORTED: esimd_emulator
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// This macro enables only cmpxch tests. They may require more time to execute,
// and have higher probablity to hit kernel execution time limit, so they are
// separated.
#define CMPXCHG_TEST

// This macro enforces usage of dword atomics in the included test.
#define USE_DWORD_ATOMICS
// This macro enforces scalar offset variant of the test - uses scalar offsets.
#define USE_SCALAR_OFFSET

#include "lsc/atomic_smoke.cpp"
21 changes: 21 additions & 0 deletions SYCL/ESIMD/dword_atomic_smoke_scalar_off.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//==--- dword_atomic_smoke_scalar_off.cpp - DPC++ ESIMD on-device test ---==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This test checks LSC atomic operations.
//===----------------------------------------------------------------------===//
// REQUIRES: gpu
// TODO: esimd_emulator fails due to random timeouts (_XFAIL_: esimd_emulator)
// UNSUPPORTED: esimd_emulator
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
//
// scalar offset variant of the test - uses scalar offsets.

#define USE_DWORD_ATOMICS
#define USE_SCALAR_OFFSET

#include "lsc/atomic_smoke.cpp"
13 changes: 13 additions & 0 deletions SYCL/ESIMD/lsc/Inputs/lsc_usm_load.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,11 @@ bool test(uint32_t pmask = 0xffffffff) {
lsc_block_store<T, VS, lsc_data_size::default_size>(
out + elem_off, vals);
} else {
#ifndef USE_SCALAR_OFFSET
simd<Toffset, VL> offset(byte_off, VS * sizeof(T));
#else
Toffset offset = byte_off;
#endif
simd_mask<VL> pred;
for (int i = 0; i < VL; i++)
pred.template select<1, 1>(i) = (pmask >> i) & 1;
Expand Down Expand Up @@ -142,8 +146,17 @@ bool test(uint32_t pmask = 0xffffffff) {
for (int i = 0; i < Size; i++) {
Tuint in_val = sycl::bit_cast<Tuint>(in[i]);
Tuint out_val = sycl::bit_cast<Tuint>(out[i]);
#ifndef USE_SCALAR_OFFSET
Tuint e = (pmask >> ((i / VS) % VL)) & 1 ? in_val & vmask
: sycl::bit_cast<Tuint>(old_val);
#else
// Calculate the mask to identify the areas that were actually updated
constexpr uint16_t mask =
1U << ((sycl::bit_cast<uint32_t>((float)VL) >> 23) - 126);
Tuint e = ((i / VS) % VL == 0) && (pmask >> ((i / VS) % VL)) & (mask - 1)
? in_val & vmask
: sycl::bit_cast<Tuint>(old_val);
#endif
if (out_val != e) {
passed = false;
std::cout << "out[" << i << "] = 0x" << std::hex << out_val
Expand Down
23 changes: 23 additions & 0 deletions SYCL/ESIMD/lsc/atomic_cmpxchg_scalar_off.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//==---- atomic_cmpxchg_scalar_off.cpp - DPC++ ESIMD on-device test ----==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This test checks LSC compare-and-exchange atomic operations.
//===----------------------------------------------------------------------===//
// REQUIRES: gpu-intel-pvc
// TODO: esimd_emulator fails due to random timeouts (_XFAIL_: esimd_emulator)
// UNSUPPORTED: esimd_emulator
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// This macro enables only cmpxch tests. They may require more time to execute,
// and have higher probablity to hit kernel execution time limit, so they are
// separated.
#define CMPXCHG_TEST
// This macro enforces scalar offset variant of the test - uses scalar offsets.
#define USE_SCALAR_OFFSET

#include "atomic_smoke.cpp"
54 changes: 54 additions & 0 deletions SYCL/ESIMD/lsc/atomic_smoke.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
//===----------------------------------------------------------------------===//
// REQUIRES: gpu-intel-pvc
// TODO: esimd_emulator fails due to random timeouts (_XFAIL_: esimd_emulator)
// TODO: esimd_emulator doesn't support xchg operation
// UNSUPPORTED: esimd_emulator
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
Expand Down Expand Up @@ -216,8 +217,12 @@ bool test(queue q, const Config &cfg) {
cgh.parallel_for<TestID<T, N, ImplF>>(
rng, [=](id<1> ii) SYCL_ESIMD_KERNEL {
int i = ii;
#ifndef USE_SCALAR_OFFSET
simd<Toffset, N> offsets(cfg.start_ind * sizeof(T),
cfg.stride * sizeof(T));
#else
Toffset offsets = 0;
#endif
simd_mask<N> m = 1;
m[cfg.masked_lane] = 0;
// barrier to achieve better contention:
Expand Down Expand Up @@ -318,8 +323,14 @@ template <class T, int N> struct ImplInc {
static T init(int i, const Config &cfg) { return (T)0; }

static T gold(int i, const Config &cfg) {
#ifndef USE_SCALAR_OFFSET
T gold = is_updated(i, N, cfg)
? (T)(cfg.repeat * cfg.threads_per_group * cfg.n_groups)
#else
T gold =
i == 0
? (T)(cfg.repeat * cfg.threads_per_group * cfg.n_groups * (N - 1))
#endif
: init(i, cfg);
return gold;
}
Expand All @@ -331,11 +342,20 @@ template <class T, int N> struct ImplDec {
static constexpr int base = 5;

static T init(int i, const Config &cfg) {
#ifndef USE_SCALAR_OFFSET
return (T)(cfg.repeat * cfg.threads_per_group * cfg.n_groups + base);
#else
return (T)(cfg.repeat * cfg.threads_per_group * cfg.n_groups * (N - 1) +
base);
#endif
}

static T gold(int i, const Config &cfg) {
#ifndef USE_SCALAR_OFFSET
T gold = is_updated(i, N, cfg) ? (T)base : init(i, cfg);
#else
T gold = i == 0 ? (T)base : init(i, cfg);
#endif
return gold;
}
};
Expand Down Expand Up @@ -364,7 +384,11 @@ template <class T, int N> struct ImplStore {
static T init(int i, const Config &cfg) { return 0; }

static T gold(int i, const Config &cfg) {
#ifndef USE_SCALAR_OFFSET
T gold = is_updated(i, N, cfg) ? base : init(i, cfg);
#else
T gold = i == 0 ? base : init(i, cfg);
#endif
return gold;
}

Expand All @@ -378,9 +402,15 @@ template <class T, int N, class C, C Op> struct ImplAdd {
static T init(int i, const Config &cfg) { return 0; }

static T gold(int i, const Config &cfg) {
#ifndef USE_SCALAR_OFFSET
T gold = is_updated(i, N, cfg) ? (T)(cfg.repeat * cfg.threads_per_group *
cfg.n_groups * (T)(1 + FPDELTA))
: init(i, cfg);
#else
T gold = i == 0 ? (T)(cfg.repeat * cfg.threads_per_group * cfg.n_groups *
(N - 1) * (T)(1 + FPDELTA))
: init(i, cfg);
#endif
return gold;
}

Expand All @@ -393,13 +423,23 @@ template <class T, int N, class C, C Op> struct ImplSub {
static constexpr T base = (T)(5 + FPDELTA);

static T init(int i, const Config &cfg) {
#ifndef USE_SCALAR_OFFSET
return (T)(cfg.repeat * cfg.threads_per_group * cfg.n_groups *
(T)(1 + FPDELTA) +
base);
#else
return (T)(cfg.repeat * cfg.threads_per_group * cfg.n_groups * (N - 1) *
(T)(1 + FPDELTA) +
base);
#endif
}

static T gold(int i, const Config &cfg) {
#ifndef USE_SCALAR_OFFSET
T gold = is_updated(i, N, cfg) ? base : init(i, cfg);
#else
T gold = i == 0 ? base : init(i, cfg);
#endif
return gold;
}

Expand All @@ -416,7 +456,11 @@ template <class T, int N, class C, C Op> struct ImplMin {
}

static T gold(int i, const Config &cfg) {
#ifndef USE_SCALAR_OFFSET
T gold = is_updated(i, N, cfg) ? (T)MIN : init(i, cfg);
#else
T gold = i == 0 ? (T)MIN : init(i, cfg);
#endif
return gold;
}

Expand All @@ -431,7 +475,11 @@ template <class T, int N, class C, C Op> struct ImplMax {
static T init(int i, const Config &cfg) { return (T)FPDELTA; }

static T gold(int i, const Config &cfg) {
#ifndef USE_SCALAR_OFFSET
T gold = is_updated(i, N, cfg)
#else
T gold = i == 0
#endif
? (T)(cfg.threads_per_group * cfg.n_groups - 1 + FPDELTA)
: init(i, cfg);
return gold;
Expand Down Expand Up @@ -482,7 +530,11 @@ template <class T, int N, class C, C Op> struct ImplCmpxchgBase {
static T init(int i, const Config &cfg) { return base - 1; }

static T gold(int i, const Config &cfg) {
#ifndef USE_SCALAR_OFFSET
T gold = is_updated(i, N, cfg)
#else
T gold = i == 0
#endif
? (T)(cfg.threads_per_group * cfg.n_groups - 1 + base)
: init(i, cfg);
return gold;
Expand Down Expand Up @@ -606,12 +658,14 @@ int main(void) {

// Check load/store operations
passed &= test_int_types<8, ImplLoad>(q, cfg);
#ifndef USE_SCALAR_OFFSET
if (q.get_backend() != sycl::backend::ext_intel_esimd_emulator)
passed &= test_int_types<8, ImplStore>(q, cfg);
#ifndef USE_DWORD_ATOMICS
if (q.get_backend() != sycl::backend::ext_intel_esimd_emulator)
passed &= test<float, 8, ImplStore>(q, cfg);
#endif // USE_DWORD_ATOMICS
#endif

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit (here and in few other places):

Suggested change
#endif
#endif // !USE_SCALAR_OFFSET

// TODO: check double other vector lengths in LSC mode.

std::cout << (passed ? "Passed\n" : "FAILED\n");
Expand Down
21 changes: 21 additions & 0 deletions SYCL/ESIMD/lsc/atomic_smoke_scalar_off.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//==---------------- atomic_smoke_scalar_off.cpp - DPC++ ESIMD on-device test
//-----==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This test checks LSC atomic operations.
//===----------------------------------------------------------------------===//
// REQUIRES: gpu-intel-pvc
// TODO: esimd_emulator fails due to random timeouts (_XFAIL_: esimd_emulator)
// UNSUPPORTED: esimd_emulator
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
//
// scalar offset variant of the test - uses scalar offsets.

#define USE_SCALAR_OFFSET

#include "atomic_smoke.cpp"
Loading