Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL][ESIMD] Add tests for ESIMD functions accepting 64 bit offsets #1385

Merged
merged 11 commits into from
Dec 14, 2022
2 changes: 1 addition & 1 deletion SYCL/ESIMD/Stencil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ int main(int argc, char *argv[]) {

simd<unsigned, WIDTH> elm16_off =
elm16 * sizeof(float) + out_off;
scatter<float, WIDTH>(outputMatrix, sum, elm16_off, p);
scatter<float, WIDTH>(outputMatrix, elm16_off, sum, p);
out_off += DIM_SIZE * sizeof(float);

if (v_pos * HEIGHT + 10 + i >= DIM_SIZE - 1)
Expand Down
11 changes: 9 additions & 2 deletions SYCL/ESIMD/api/esimd_rgba_smoke.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@

#include <iostream>

#ifdef USE_64_BIT_OFFSET
typedef uint64_t Toffset;
#else
typedef uint32_t Toffset;
#endif

using namespace sycl;
using namespace sycl::ext::intel::esimd;

Expand Down Expand Up @@ -71,7 +77,8 @@ bool test_impl(queue q) {

std::cout << "Testing mask=";
print_mask(ChMask);
std::cout << ", T=" << typeid(T).name() << ", NPixels=" << NPixels << "\n";
std::cout << ", T=" << esimd_test::type_name<T>() << ", NPixels=" << NPixels
<< "\n";

T *A = malloc_shared<T>(Size, q);
T *B = malloc_shared<T>(Size, q);
Expand All @@ -92,7 +99,7 @@ bool test_impl(queue q) {
cgh.single_task<TestID<T, NPixels, static_cast<int>(ChMask)>>(
[=]() SYCL_ESIMD_KERNEL {
constexpr unsigned NElems = NPixels * NOnChs;
simd<unsigned int, NPixels> offsets(0, sizeof(T) * NAllChs);
simd<Toffset, NPixels> offsets(0, sizeof(T) * NAllChs);
simd<T, NElems> p = gather_rgba<ChMask>(A, offsets);
// simply scatter back to B - should give same results as A in
// enabled channels, the rest should remain zero:
Expand Down
18 changes: 18 additions & 0 deletions SYCL/ESIMD/api/esimd_rgba_smoke_64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//==---------------- esimd_rgba_smoke_64.cpp - DPC++ ESIMD on-device test -==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: gpu
// UNSUPPORTED: cuda || hip
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// Smoke test for scatter/gather also illustrating correct use of these APIs
// 64 bit offset variant of the test - uses 64 bit offsets.

#define USE_64_BIT_OFFSET

#include "esimd_rgba_smoke.cpp"
8 changes: 7 additions & 1 deletion SYCL/ESIMD/api/svm_gather_scatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ using namespace sycl::ext::intel::esimd;
using bfloat16 = sycl::ext::oneapi::bfloat16;
using tfloat32 = sycl::ext::intel::experimental::esimd::tfloat32;

#ifdef USE_64_BIT_OFFSET
typedef uint64_t Toffset;
#else
typedef uint32_t Toffset;
#endif

template <typename T, int N> bool test(queue &Q) {
std::cout << " Running " << esimd_test::type_name<T>() << " test, N=" << N
<< "...\n";
Expand Down Expand Up @@ -55,7 +61,7 @@ template <typename T, int N> bool test(queue &Q) {
try {
Q.submit([&](handler &CGH) {
CGH.parallel_for(sycl::range<1>{1}, [=](id<1>) SYCL_ESIMD_KERNEL {
simd<uint32_t, N> Offsets(0u, sizeof(T));
simd<Toffset, N> Offsets(0u, sizeof(T));
scatter<T, N>(Dst, Offsets, gather<T, N>(Src, Offsets));
});
}).wait();
Expand Down
18 changes: 18 additions & 0 deletions SYCL/ESIMD/api/svm_gather_scatter_64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//==---------- svm_gather_scatter_64.cpp - DPC++ ESIMD on-device test -==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: gpu && !gpu-intel-pvc
// UNSUPPORTED: cuda || hip
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// Regression test for gather/scatter API.
// 64 bit offset variant of the test - uses 64 bit offsets.

#define USE_64_BIT_OFFSET

#include "svm_gather_scatter.cpp"
18 changes: 18 additions & 0 deletions SYCL/ESIMD/api/svm_gather_scatter_pvc_64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//==---------- svm_gather_scatter_pvc_64.cpp - DPC++ ESIMD on-device test -==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: gpu-intel-pvc
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// Regression test for SVM gather/scatter API.
// PVC variant of the test - adds tfloat32 and uses 64 bit offsets.

#define USE_TF32
#define USE_64_BIT_OFFSET

#include "svm_gather_scatter.cpp"
8 changes: 7 additions & 1 deletion SYCL/ESIMD/histogram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ using namespace sycl;
#define BLOCK_WIDTH 32
#define BLOCK_HEIGHT 64

#ifdef USE_64_BIT_OFFSET
typedef uint64_t Toffset;
#else
typedef uint32_t Toffset;
#endif

void histogram_CPU(unsigned int width, unsigned int height, unsigned char *srcY,
unsigned int *cpuHistogram) {
int i;
Expand Down Expand Up @@ -191,7 +197,7 @@ int main(int argc, char *argv[]) {
}

// Declare a vector to store the offset for atomic write operation
simd<unsigned int, 8> offset(0, 1); // init to 0, 1, 2, ..., 7
simd<Toffset, 8> offset(0, 1); // init to 0, 1, 2, ..., 7
offset *= sizeof(unsigned int);

// Update global sum by atomically adding each local histogram
Expand Down
8 changes: 7 additions & 1 deletion SYCL/ESIMD/histogram_2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ using namespace sycl;
#define BLOCK_WIDTH 32
#define BLOCK_HEIGHT 64

#ifdef USE_64_BIT_OFFSET
typedef uint64_t Toffset;
#else
typedef uint32_t Toffset;
#endif

void histogram_CPU(unsigned int width, unsigned int height, unsigned char *srcY,
unsigned int *cpuHistogram) {
int i;
Expand Down Expand Up @@ -193,7 +199,7 @@ int main(int argc, char *argv[]) {
}

// Declare a vector to store the offset for atomic write operation
simd<unsigned int, 8> offset(0, 1); // init to 0, 1, 2, ..., 7
simd<Toffset, 8> offset(0, 1); // init to 0, 1, 2, ..., 7
offset *= sizeof(unsigned int);

// Update global sum by atomically adding each local histogram
Expand Down
17 changes: 17 additions & 0 deletions SYCL/ESIMD/histogram_2d_64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
//==---------------- histogram_2d_64.cpp - DPC++ ESIMD on-device test -----==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: gpu
// UNSUPPORTED: cuda || hip
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
//
// 64 bit offset variant of the test - uses 64 bit offsets.

#define USE_64_BIT_OFFSET

#include "histogram_2d.cpp"
17 changes: 17 additions & 0 deletions SYCL/ESIMD/histogram_64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
//==---------------- histogram_64.cpp - DPC++ ESIMD on-device test ------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: gpu
// UNSUPPORTED: cuda || hip
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
//
// 64 bit offset variant of the test - uses 64 bit offsets.

#define USE_64_BIT_OFFSET

#include "histogram.cpp"
8 changes: 7 additions & 1 deletion SYCL/ESIMD/lsc/Inputs/lsc_usm_load.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ using namespace sycl;
using namespace sycl::ext::intel::esimd;
using namespace sycl::ext::intel::experimental::esimd;

#ifdef USE_64_BIT_OFFSET
typedef uint64_t Toffset;
#else
typedef uint32_t Toffset;
#endif

template <int case_num, typename T, uint32_t Groups, uint32_t Threads,
uint16_t VL, uint16_t VS, bool transpose,
lsc_data_size DS = lsc_data_size::default_size,
Expand Down Expand Up @@ -89,7 +95,7 @@ bool test(uint32_t pmask = 0xffffffff) {
lsc_block_store<T, VS, lsc_data_size::default_size>(
out + elem_off, vals);
} else {
simd<uint32_t, VL> offset(byte_off, VS * sizeof(T));
simd<Toffset, VL> offset(byte_off, VS * sizeof(T));
simd_mask<VL> pred;
for (int i = 0; i < VL; i++)
pred.template select<1, 1>(i) = (pmask >> i) & 1;
Expand Down
8 changes: 7 additions & 1 deletion SYCL/ESIMD/lsc/Inputs/lsc_usm_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ using namespace sycl;
using namespace sycl::ext::intel::esimd;
using namespace sycl::ext::intel::experimental::esimd;

#ifdef USE_64_BIT_OFFSET
typedef uint64_t Toffset;
#else
typedef uint32_t Toffset;
#endif

template <int case_num, typename T, uint32_t Groups, uint32_t Threads,
uint16_t VL, uint16_t VS, bool transpose,
lsc_data_size DS = lsc_data_size::default_size,
Expand Down Expand Up @@ -78,7 +84,7 @@ bool test(uint32_t pmask = 0xffffffff) {
simd<T, VS> vals(new_val + elem_off, 1);
lsc_block_store<T, VS, DS, L1H, L3H>(out + elem_off, vals);
} else {
simd<uint32_t, VL> offset(byte_off, VS * sizeof(T));
simd<Toffset, VL> offset(byte_off, VS * sizeof(T));
simd_mask<VL> pred;
for (int i = 0; i < VL; i++)
pred.template select<1, 1>(i) = (pmask >> i) & 1;
Expand Down
10 changes: 8 additions & 2 deletions SYCL/ESIMD/lsc/atomic_smoke.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ using namespace sycl;
using namespace sycl::ext::intel::esimd;
using namespace sycl::ext::intel::experimental::esimd;

#ifdef USE_64_BIT_OFFSET
typedef uint64_t Toffset;
#else
typedef uint32_t Toffset;
#endif

struct Config {
int threads_per_group;
int n_groups;
Expand Down Expand Up @@ -210,8 +216,8 @@ bool test(queue q, const Config &cfg) {
cgh.parallel_for<TestID<T, N, ImplF>>(
rng, [=](id<1> ii) SYCL_ESIMD_KERNEL {
int i = ii;
simd<unsigned, N> offsets(cfg.start_ind * sizeof(T),
cfg.stride * sizeof(T));
simd<Toffset, N> offsets(cfg.start_ind * sizeof(T),
cfg.stride * sizeof(T));
simd_mask<N> m = 1;
m[cfg.masked_lane] = 0;
// barrier to achieve better contention:
Expand Down
20 changes: 20 additions & 0 deletions SYCL/ESIMD/lsc/atomic_smoke_64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//==---------------- atomic_smoke_64.cpp - DPC++ ESIMD on-device test -----==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This test checks LSC atomic operations.
//===----------------------------------------------------------------------===//
// REQUIRES: gpu-intel-pvc
// TODO: esimd_emulator fails due to random timeouts (_XFAIL_: esimd_emulator)
// UNSUPPORTED: esimd_emulator
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
//
// 64 bit offset variant of the test - uses 64 bit offsets.

#define USE_64_BIT_OFFSET

#include "atomic_smoke.cpp"
10 changes: 8 additions & 2 deletions SYCL/ESIMD/lsc/lsc_usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@
#include <sycl/ext/intel/esimd.hpp>
#include <sycl/sycl.hpp>

#ifdef USE_64_BIT_OFFSET
typedef uint64_t Toffset;
#else
typedef uint32_t Toffset;
#endif

int main() {
using namespace sycl;
using namespace sycl::ext::intel::esimd;
Expand Down Expand Up @@ -48,8 +54,8 @@ int main() {
h.parallel_for<class SimplestKernel>(
range<1>{size / SIMDSize}, [=](id<1> id) SYCL_ESIMD_KERNEL {
auto offset = id[0] * SIMDSize;
auto offsets = simd<uint32_t, SIMDSize>(id * SIMDSize * sizeof(int),
sizeof(int));
auto offsets = simd<Toffset, SIMDSize>(id * SIMDSize * sizeof(int),
sizeof(int));
auto pred = simd_mask<SIMDSize>(1);
auto add = simd<int, SIMDSize>(5);
auto compare = simd<int, SIMDSize>(id * SIMDSize, 1);
Expand Down
16 changes: 16 additions & 0 deletions SYCL/ESIMD/lsc/lsc_usm_64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
//==------------ lsc_usm_64.cpp - DPC++ ESIMD on-device test --------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: gpu-intel-pvc || esimd_emulator
// UNSUPPORTED: cuda || hip
// TODO : esimd_emulator does not support lsc-atomic yet
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

#define USE_64_BIT_OFFSET

#include "lsc_usm.cpp"
18 changes: 18 additions & 0 deletions SYCL/ESIMD/lsc/lsc_usm_atomic_cachehint_64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//==--- lsc_usm_atomic_cachehint_64.cpp - DPC++ ESIMD on-device test -------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: gpu-intel-pvc || esimd_emulator
// UNSUPPORTED: cuda || hip
// TODO : Test uses 'kernel_bundle' that is not supported in ESIMD_EMULATOR
// XFAIL: esimd_emulator
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// 64 bit offset variant of the test - uses 64 bit offsets.

#define USE_64_BIT_OFFSET

#include "lsc_usm_atomic_cachehint.cpp"
17 changes: 17 additions & 0 deletions SYCL/ESIMD/lsc/lsc_usm_load_u16u32_64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
//==------- lsc_usm_load_u16u32_64.cpp - DPC++ ESIMD on-device test --------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: gpu-intel-pvc || esimd_emulator
// UNSUPPORTED: cuda || hip
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// 64 bit offset variant of the test - uses 64 bit offsets.

#define USE_64_BIT_OFFSET

#include "lsc_usm_load_u16u32.cpp"
17 changes: 17 additions & 0 deletions SYCL/ESIMD/lsc/lsc_usm_load_u32_64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
//==------- lsc_usm_load_u32_64.cpp - DPC++ ESIMD on-device test -----------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: gpu-intel-pvc || esimd_emulator
// UNSUPPORTED: cuda || hip
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// 64 bit offset variant of the test - uses 64 bit offsets.

#define USE_64_BIT_OFFSET

#include "lsc_usm_load_u32.cpp"
Loading