Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit de3ccba

Browse files
authored
[SYCL] add ESIMD tests (#29)
* add ESIMD tests * add ESIMD-specific LIT substitutions * align execution rules with llvm-test-suite
1 parent 7fd62f4 commit de3ccba

33 files changed

+6665
-4
lines changed

SYCL/ESIMD/BitonicSortK.cpp

Lines changed: 686 additions & 0 deletions
Large diffs are not rendered by default.

SYCL/ESIMD/BitonicSortKv2.cpp

Lines changed: 601 additions & 0 deletions
Large diffs are not rendered by default.

SYCL/ESIMD/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Overview
2+
This directory contains ESIMD tests which are run on Intel GPU device only.
3+
Some of them can run on host device too, but in general it is not always
4+
possible as some of ESIMD APIs (e.g. memory access via accessors) is not
5+
implemented for the host device.

SYCL/ESIMD/Stencil.cpp

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
//==---------------- stencil.cpp - DPC++ ESIMD on-device test ------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// TODO enable on Windows and Level Zero
9+
// REQUIRES: linux && gpu && opencl
10+
// RUN: %clangxx-esimd -fsycl %s -o %t.out
11+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
12+
// RUN: %ESIMD_RUN_PLACEHOLDER %t.out
13+
14+
#include "esimd_test_utils.hpp"
15+
16+
#include <CL/sycl.hpp>
17+
#include <CL/sycl/INTEL/esimd.hpp>
18+
#include <iostream>
19+
20+
//
21+
// test smaller input size
22+
// test 8x16 block size
23+
//
24+
#define DIM_SIZE (1 << 10)
25+
#define SQUARE_SZ (DIM_SIZE * DIM_SIZE)
26+
27+
#define WIDTH 16
28+
#define HEIGHT 16
29+
30+
using namespace cl::sycl;
31+
32+
void InitializeSquareMatrix(float *matrix, size_t const Dim,
33+
bool const bSkipDataGeneration) {
34+
memset(matrix, 0, Dim * Dim * sizeof(float));
35+
if (!bSkipDataGeneration) {
36+
for (unsigned int iRow = 0; iRow < Dim; ++iRow) {
37+
for (unsigned int iCol = 0; iCol < Dim; ++iCol) {
38+
matrix[iRow * Dim + iCol] = static_cast<float>(iRow + iCol);
39+
}
40+
}
41+
}
42+
}
43+
44+
bool CheckResults(float *out, float *in) {
45+
unsigned int n = DIM_SIZE;
46+
for (unsigned int i = 0; i < n; i++) {
47+
for (unsigned int j = 0; j < n; j++) {
48+
if ((5 <= i) && (i < n - 5) && (5 <= j) && (j < n - 5)) {
49+
float res = +in[(i - 5) * n + (j + 0)] * -0.02f +
50+
in[(i - 4) * n + (j + 0)] * -0.025f +
51+
in[(i - 3) * n + (j + 0)] * -0.0333333333333f +
52+
in[(i - 2) * n + (j + 0)] * -0.05f +
53+
in[(i - 1) * n + (j + 0)] * -0.1f +
54+
in[(i + 0) * n + (j - 5)] * -0.02f +
55+
in[(i + 0) * n + (j - 4)] * -0.025f +
56+
in[(i + 0) * n + (j - 3)] * -0.0333333333333f +
57+
in[(i + 0) * n + (j - 2)] * -0.05f +
58+
in[(i + 0) * n + (j - 1)] * -0.1f +
59+
in[(i + 0) * n + (j + 1)] * 0.1f +
60+
in[(i + 0) * n + (j + 2)] * 0.05f +
61+
in[(i + 0) * n + (j + 3)] * 0.0333333333333f +
62+
in[(i + 0) * n + (j + 4)] * 0.025f +
63+
in[(i + 0) * n + (j + 5)] * 0.02f +
64+
in[(i + 1) * n + (j + 0)] * 0.1f +
65+
in[(i + 2) * n + (j + 0)] * 0.05f +
66+
in[(i + 3) * n + (j + 0)] * 0.0333333333333f +
67+
in[(i + 4) * n + (j + 0)] * 0.025f +
68+
in[(i + 5) * n + (j + 0)] * 0.02f;
69+
70+
// check result
71+
if (abs(res - out[i * n + j]) >= 0.0015f) {
72+
std::cout << "out[" << i << "][" << j << "] = " << out[i * n + j]
73+
<< " expect result " << res << std::endl;
74+
return false;
75+
}
76+
}
77+
}
78+
}
79+
return true;
80+
}
81+
82+
int main(void) {
83+
uint range_width =
84+
(DIM_SIZE - 10) / WIDTH + (((DIM_SIZE - 10) % WIDTH == 0) ? 0 : 1);
85+
uint range_height =
86+
(DIM_SIZE - 10) / HEIGHT + (((DIM_SIZE - 10) % HEIGHT == 0) ? 0 : 1);
87+
cl::sycl::range<2> GlobalRange{range_width, range_height};
88+
89+
std::cout << "width = " << range_width << " height = " << range_height
90+
<< std::endl;
91+
cl::sycl::range<2> LocalRange{1, 1};
92+
93+
queue q(esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler());
94+
95+
auto dev = q.get_device();
96+
std::cout << "Running on " << dev.get_info<info::device::name>() << "\n";
97+
auto ctxt = q.get_context();
98+
99+
// create and init matrices
100+
float *inputMatrix =
101+
static_cast<float *>(malloc_shared(SQUARE_SZ * sizeof(float), dev, ctxt));
102+
float *outputMatrix =
103+
static_cast<float *>(malloc_shared(SQUARE_SZ * sizeof(float), dev, ctxt));
104+
InitializeSquareMatrix(inputMatrix, DIM_SIZE, false);
105+
InitializeSquareMatrix(outputMatrix, DIM_SIZE, true);
106+
107+
auto e = q.submit([&](handler &cgh) {
108+
cgh.parallel_for<class Stencil_kernel>(
109+
GlobalRange * LocalRange, [=](item<2> it) SYCL_ESIMD_KERNEL {
110+
using namespace sycl::INTEL::gpu;
111+
uint h_pos = it.get_id(0);
112+
uint v_pos = it.get_id(1);
113+
114+
simd<float, (HEIGHT + 10) * 32> vin;
115+
// matrix HEIGHT+10 x 32
116+
auto in = vin.format<float, HEIGHT + 10, 32>();
117+
118+
//
119+
// rather than loading all data in
120+
// the code will interleave data loading and compute
121+
// first, we load enough data for the first 16 pixels
122+
//
123+
unsigned off = (v_pos * HEIGHT) * DIM_SIZE + h_pos * WIDTH;
124+
#pragma unroll
125+
for (unsigned i = 0; i < 10; i++) {
126+
in.row(i) = block_load<float, 32>(inputMatrix + off);
127+
off += DIM_SIZE;
128+
}
129+
130+
unsigned out_off =
131+
(((v_pos * HEIGHT + 5) * DIM_SIZE + (h_pos * WIDTH) + 5)) *
132+
sizeof(float);
133+
simd<unsigned, WIDTH> elm16(0, 1);
134+
135+
#pragma unroll
136+
for (unsigned i = 0; i < HEIGHT; i++) {
137+
138+
in.row(10 + i) = block_load<float, 32>(inputMatrix + off);
139+
off += DIM_SIZE;
140+
141+
simd<float, WIDTH> sum =
142+
in.row(i + 0).select<WIDTH, 1>(5) * -0.02f +
143+
in.row(i + 1).select<WIDTH, 1>(5) * -0.025f +
144+
in.row(i + 2).select<WIDTH, 1>(5) * -0.0333333333333f +
145+
in.row(i + 3).select<WIDTH, 1>(5) * -0.05f +
146+
in.row(i + 4).select<WIDTH, 1>(5) * -0.1f +
147+
in.row(i + 6).select<WIDTH, 1>(5) * 0.1f +
148+
in.row(i + 7).select<WIDTH, 1>(5) * 0.05f +
149+
in.row(i + 8).select<WIDTH, 1>(5) * 0.0333333333333f +
150+
in.row(i + 9).select<WIDTH, 1>(5) * 0.025f +
151+
in.row(i + 10).select<WIDTH, 1>(5) * 0.02f +
152+
in.row(i + 5).select<WIDTH, 1>(0) * -0.02f +
153+
in.row(i + 5).select<WIDTH, 1>(1) * -0.025f +
154+
in.row(i + 5).select<WIDTH, 1>(2) * -0.0333333333333f +
155+
in.row(i + 5).select<WIDTH, 1>(3) * -0.05f +
156+
in.row(i + 5).select<WIDTH, 1>(4) * -0.1f +
157+
in.row(i + 5).select<WIDTH, 1>(6) * 0.1f +
158+
in.row(i + 5).select<WIDTH, 1>(7) * 0.05f +
159+
in.row(i + 5).select<WIDTH, 1>(8) * 0.0333333333333f +
160+
in.row(i + 5).select<WIDTH, 1>(9) * 0.025f +
161+
in.row(i + 5).select<WIDTH, 1>(10) * 0.02f;
162+
163+
// predciate output
164+
simd<ushort, WIDTH> p = (elm16 + h_pos * WIDTH) < DIM_SIZE - 10;
165+
166+
simd<unsigned, WIDTH> elm16_off = elm16 * sizeof(float) + out_off;
167+
scatter<float, WIDTH>(outputMatrix, sum, elm16_off, p);
168+
out_off += DIM_SIZE * sizeof(float);
169+
170+
if (v_pos * HEIGHT + 10 + i >= DIM_SIZE - 1)
171+
break;
172+
}
173+
});
174+
});
175+
e.wait();
176+
177+
// check result
178+
bool passed = CheckResults(outputMatrix, inputMatrix);
179+
if (passed) {
180+
std::cout << "PASSED" << std::endl;
181+
} else {
182+
std::cout << "FAILED" << std::endl;
183+
}
184+
free(inputMatrix, ctxt);
185+
free(outputMatrix, ctxt);
186+
return 0;
187+
}

SYCL/ESIMD/accessor.cpp

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
//==---------------- accessor.cpp - DPC++ ESIMD on-device test ------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// TODO enable on Windows and Level Zero
9+
// REQUIRES: linux && gpu && opencl
10+
// RUN: %clangxx-esimd -fsycl -D_CRT_SECURE_NO_WARNINGS=1 %s -o %t.out
11+
// RUN: %ESIMD_RUN_PLACEHOLDER %t.out
12+
13+
// This test checks that accessor-based memory accesses work correctly in ESIMD.
14+
15+
#include "esimd_test_utils.hpp"
16+
17+
#include <CL/sycl.hpp>
18+
#include <CL/sycl/INTEL/esimd.hpp>
19+
20+
#include <iostream>
21+
22+
using namespace cl::sycl;
23+
24+
constexpr unsigned int VL = 1024 * 128;
25+
26+
using Ty = float;
27+
28+
int main() {
29+
Ty data0[VL] = {0};
30+
Ty data1[VL] = {0};
31+
constexpr Ty VAL = 5;
32+
33+
for (int i = 0; i < VL; i++) {
34+
data0[i] = i;
35+
}
36+
37+
try {
38+
queue q(esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler());
39+
40+
buffer<Ty, 1> buf0(data0, range<1>(VL));
41+
buffer<Ty, 1> buf1(data1, range<1>(VL));
42+
43+
q.submit([&](handler &cgh) {
44+
std::cout << "Running on "
45+
<< q.get_device().get_info<cl::sycl::info::device::name>()
46+
<< "\n";
47+
48+
auto acc0 = buf0.get_access<access::mode::read_write>(cgh);
49+
auto acc1 = buf1.get_access<access::mode::write>(cgh);
50+
51+
cgh.parallel_for<class Test>(
52+
range<1>(1), [=](sycl::id<1> i) SYCL_ESIMD_KERNEL {
53+
using namespace sycl::INTEL::gpu;
54+
unsigned int offset = 0;
55+
for (int k = 0; k < VL / 16; k++) {
56+
simd<Ty, 16> var = block_load<Ty, 16>(acc0, offset);
57+
var += VAL;
58+
block_store(acc0, offset, var);
59+
block_store(acc1, offset, var + 1);
60+
offset += 64;
61+
}
62+
});
63+
});
64+
65+
q.wait();
66+
67+
} catch (cl::sycl::exception const &e) {
68+
std::cout << "SYCL exception caught: " << e.what() << '\n';
69+
return 2;
70+
}
71+
72+
int err_cnt = 0;
73+
74+
for (int i = 0; i < VL; i++) {
75+
Ty gold0 = i + VAL;
76+
Ty gold1 = gold0 + 1;
77+
Ty val0 = data0[i];
78+
Ty val1 = data1[i];
79+
80+
if (val0 != gold0) {
81+
if (++err_cnt < 10)
82+
std::cerr << "*** ERROR at data0[" << i << "]: " << val0
83+
<< " != " << gold0 << "(gold)\n";
84+
}
85+
if (val1 != gold1) {
86+
if (++err_cnt < 10)
87+
std::cerr << "*** ERROR at data1[" << i << "]: " << val1
88+
<< " != " << gold1 << "(gold)\n";
89+
}
90+
}
91+
if (err_cnt == 0) {
92+
std::cout << "Passed\n";
93+
return 0;
94+
} else {
95+
std::cout << "Failed: " << err_cnt << " of " << VL << " errors\n";
96+
return 1;
97+
}
98+
}

0 commit comments

Comments
 (0)