Skip to content

Commit 7af64df

Browse files
[SYCL][E2E] Introduce performance tests (#12372)
Just an additional directory under sycl/test-e2e that needs "llvm-lit --param enable-perf-tests=1" to be executed and changes in post-commit task to enable running those using a single worker and capturing output even on PASSes. I plan on modifying pre-commit to run the same given a label is provided in a separate PR. Better infrastructure, like using google benchmark or deeper integration with CI, is not planned at the moment.
1 parent 250f5df commit 7af64df

File tree

4 files changed

+97
-1
lines changed

4 files changed

+97
-1
lines changed

.github/workflows/sycl_post_commit.yml

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,16 +46,33 @@ jobs:
4646
- name: Intel Arc A-Series Graphics with Level Zero
4747
runner: '["Linux", "arc"]'
4848
extra_lit_opts: --param matrix-xmx8=True --param gpu-intel-dg2=True
49+
# Performance tests below. Specifics:
50+
# - only run performance tests (use LIT_FILTER env)
51+
# - ask llvm-lit to show all the output, even for PASS (-a)
52+
# - run in single thread (-j 1)
53+
# - enable the tests in LIT (--param enable-perf-tests=True)
54+
# - run on all available devices.
55+
- name: Perf tests on Intel GEN12 Graphics system
56+
runner: '["Linux", "gen12"]'
57+
env: '{"LIT_FILTER":"PerformanceTests/"}'
58+
extra_lit_opts: -a -j 1 --param enable-perf-tests=True
59+
target_devices: all
60+
- name: Perf tests on Intel Arc A-Series Graphics system
61+
runner: '["Linux", "arc"]'
62+
env: '{"LIT_FILTER":"PerformanceTests/"}'
63+
extra_lit_opts: -a -j 1 --param enable-perf-tests=True
64+
target_devices: all
4965
uses: ./.github/workflows/sycl_linux_run_tests.yml
5066
with:
5167
name: ${{ matrix.name }}
5268
runner: ${{ matrix. runner }}
5369
image: ghcr.io/intel/llvm/ubuntu2204_intel_drivers:latest
5470
image_options: -u 1001 --device=/dev/dri --privileged --cap-add SYS_ADMIN
55-
target_devices: ext_oneapi_level_zero:gpu
71+
target_devices: ${{ matrix.target_devices || 'ext_oneapi_level_zero:gpu' }}
5672
reset_gpu: true
5773

5874
extra_lit_opts: ${{ matrix.extra_lit_opts }}
75+
env: ${{ matrix.env || '{}' }}
5976

6077
ref: ${{ github.sha }}
6178
merge_ref: ''
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
4+
#include <sycl/sycl.hpp>
5+
6+
#include <sycl/ext/oneapi/experimental/user_defined_reductions.hpp>
7+
8+
#include <iomanip>
9+
10+
using namespace sycl;
11+
namespace sycl_exp = sycl::ext::oneapi::experimental;
12+
13+
template <typename T> void test() {
14+
std::cout << std::setw(50) << __PRETTY_FUNCTION__ << ", time:";
15+
constexpr int WG_SIZE = 32 + 16 + 8 + 4;
16+
constexpr int GLOBAL_SIZE = WG_SIZE * 1;
17+
18+
queue q;
19+
20+
buffer<T, 1> b{GLOBAL_SIZE};
21+
22+
for (int i = 0; i < 5; ++i) {
23+
auto start = std::chrono::high_resolution_clock::now();
24+
q.submit([&](handler &cgh) {
25+
accessor acc{b, cgh};
26+
size_t temp_memory_size = WG_SIZE * sizeof(T);
27+
auto scratch = sycl::local_accessor<std::byte, 1>(temp_memory_size, cgh);
28+
29+
cgh.parallel_for(
30+
nd_range<1>{range<1>{GLOBAL_SIZE}, range<1>{WG_SIZE}},
31+
[=](nd_item<1> ndi) {
32+
auto g = ndi.get_group();
33+
auto sg = ndi.get_sub_group();
34+
// sg's scratch space starts at sg leader's *group* linear id.
35+
auto sg_scratch = sycl::span(
36+
&scratch[group_broadcast(sg, g.get_local_linear_id())],
37+
sizeof(T) * sg.get_local_linear_range());
38+
auto handle = sycl_exp::group_with_scratchpad(sg, sg_scratch);
39+
T val{0};
40+
auto binop = [](T x, T y) { return x + y; };
41+
for (int j = 0; j < 100000; ++j)
42+
val += sycl_exp::reduce_over_group(
43+
handle, static_cast<T>(j % 100), binop);
44+
acc[ndi.get_global_linear_id()] = val;
45+
});
46+
}).wait();
47+
if (i == 0)
48+
continue; // skip first iteration's overheads.
49+
auto end = std::chrono::high_resolution_clock::now();
50+
auto time =
51+
std::chrono::duration_cast<std::chrono::milliseconds>(end - start)
52+
.count();
53+
std::cout << " " << std::setw(6) << time << "ms";
54+
}
55+
std::cout << std::endl;
56+
}
57+
58+
int main() {
59+
test<uint8_t>();
60+
test<uint16_t>();
61+
test<uint32_t>();
62+
test<uint64_t>();
63+
64+
test<int8_t>();
65+
test<int16_t>();
66+
test<int32_t>();
67+
test<int64_t>();
68+
69+
if (device{}.has(aspect::fp16))
70+
test<half>();
71+
test<float>();
72+
if (device{}.has(aspect::fp64))
73+
test<double>();
74+
75+
return 0;
76+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
config.required_features += ['enable-perf-tests']

sycl/test-e2e/lit.cfg.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,8 @@
183183
config.ur_l0_leaks_debug = lit_config.params.get("ur_l0_leaks_debug")
184184
lit_config.note("UR_L0_LEAKS_DEBUG: " + config.ur_l0_leaks_debug)
185185

186+
if lit_config.params.get("enable-perf-tests", False):
187+
config.available_features.add("enable-perf-tests")
186188
# Make sure that any dynamic checks below are done in the build directory and
187189
# not where the sources are located. This is important for the in-tree
188190
# configuration (as opposite to the standalone one).

0 commit comments

Comments
 (0)