Skip to content

[SYCL][E2E] Introduce performance tests #12372

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion .github/workflows/sycl_post_commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,33 @@ jobs:
- name: Intel Arc A-Series Graphics with Level Zero
runner: '["Linux", "arc"]'
extra_lit_opts: --param matrix-xmx8=True --param gpu-intel-dg2=True
# Performance tests below. Specifics:
# - only run performance tests (use LIT_FILTER env)
# - ask llvm-lit to show all the output, even for PASS (-a)
# - run in single thread (-j 1)
# - enable the tests in LIT (--param enable-perf-tests=True)
# - run on all available devices.
- name: Perf tests on Intel GEN12 Graphics system
runner: '["Linux", "gen12"]'
env: '{"LIT_FILTER":"PerformanceTests/"}'
extra_lit_opts: -a -j 1 --param enable-perf-tests=True
target_devices: all
- name: Perf tests on Intel Arc A-Series Graphics system
runner: '["Linux", "arc"]'
env: '{"LIT_FILTER":"PerformanceTests/"}'
extra_lit_opts: -a -j 1 --param enable-perf-tests=True
target_devices: all
uses: ./.github/workflows/sycl_linux_run_tests.yml
with:
name: ${{ matrix.name }}
runner: ${{ matrix. runner }}
image: ghcr.io/intel/llvm/ubuntu2204_intel_drivers:latest
image_options: -u 1001 --device=/dev/dri --privileged --cap-add SYS_ADMIN
target_devices: ext_oneapi_level_zero:gpu
target_devices: ${{ matrix.target_devices || 'ext_oneapi_level_zero:gpu' }}
reset_gpu: true

extra_lit_opts: ${{ matrix.extra_lit_opts }}
env: ${{ matrix.env || '{}' }}

ref: ${{ github.sha }}
merge_ref: ''
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include <sycl/sycl.hpp>

#include <sycl/ext/oneapi/experimental/user_defined_reductions.hpp>

#include <iomanip>

using namespace sycl;
namespace sycl_exp = sycl::ext::oneapi::experimental;

template <typename T> void test() {
std::cout << std::setw(50) << __PRETTY_FUNCTION__ << ", time:";
constexpr int WG_SIZE = 32 + 16 + 8 + 4;
constexpr int GLOBAL_SIZE = WG_SIZE * 1;

queue q;

buffer<T, 1> b{GLOBAL_SIZE};

for (int i = 0; i < 5; ++i) {
auto start = std::chrono::high_resolution_clock::now();
q.submit([&](handler &cgh) {
accessor acc{b, cgh};
size_t temp_memory_size = WG_SIZE * sizeof(T);
auto scratch = sycl::local_accessor<std::byte, 1>(temp_memory_size, cgh);

cgh.parallel_for(
nd_range<1>{range<1>{GLOBAL_SIZE}, range<1>{WG_SIZE}},
[=](nd_item<1> ndi) {
auto g = ndi.get_group();
auto sg = ndi.get_sub_group();
// sg's scratch space starts at sg leader's *group* linear id.
auto sg_scratch = sycl::span(
&scratch[group_broadcast(sg, g.get_local_linear_id())],
sizeof(T) * sg.get_local_linear_range());
auto handle = sycl_exp::group_with_scratchpad(sg, sg_scratch);
T val{0};
auto binop = [](T x, T y) { return x + y; };
for (int j = 0; j < 100000; ++j)
val += sycl_exp::reduce_over_group(
handle, static_cast<T>(j % 100), binop);
acc[ndi.get_global_linear_id()] = val;
});
}).wait();
if (i == 0)
continue; // skip first iteration's overheads.
auto end = std::chrono::high_resolution_clock::now();
auto time =
std::chrono::duration_cast<std::chrono::milliseconds>(end - start)
.count();
std::cout << " " << std::setw(6) << time << "ms";
}
std::cout << std::endl;
}

int main() {
test<uint8_t>();
test<uint16_t>();
test<uint32_t>();
test<uint64_t>();

test<int8_t>();
test<int16_t>();
test<int32_t>();
test<int64_t>();

if (device{}.has(aspect::fp16))
test<half>();
test<float>();
if (device{}.has(aspect::fp64))
test<double>();

return 0;
}
1 change: 1 addition & 0 deletions sycl/test-e2e/PerformanceTests/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
config.required_features += ['enable-perf-tests']
2 changes: 2 additions & 0 deletions sycl/test-e2e/lit.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@
config.ur_l0_leaks_debug = lit_config.params.get("ur_l0_leaks_debug")
lit_config.note("UR_L0_LEAKS_DEBUG: " + config.ur_l0_leaks_debug)

if lit_config.params.get("enable-perf-tests", False):
config.available_features.add("enable-perf-tests")
# Make sure that any dynamic checks below are done in the build directory and
# not where the sources are located. This is important for the in-tree
# configuration (as opposite to the standalone one).
Expand Down