|
| 1 | +// REQUIRES: gpu, level_zero |
| 2 | + |
| 3 | +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out |
| 4 | +// RUN: env ZE_DEBUG=-1 %GPU_RUN_PLACEHOLDER %t.out 2>&1 | FileCheck --check-prefixes=WITHOUT %s |
| 5 | +// RUN: env ZE_DEBUG=-1 %GPU_RUN_PLACEHOLDER %t.out profile 2>&1 | FileCheck --check-prefixes=WITH %s |
| 6 | + |
| 7 | +// Test case adapted from the SYCL version of Rodinia benchmark hotspot. |
| 8 | + |
| 9 | +// clang-format off |
| 10 | +// Check the expected output when queue::enable_profiling is not specified |
| 11 | +// |
| 12 | +// WITHOUT: ze_event_pool_desc_t flags set to: 1 |
| 13 | +// WITHOUT: SYCL exception caught: Native API failed. Native API returns: -7 (CL_PROFILING_INFO_NOT_AVAILABLE) |
| 14 | + |
| 15 | +// Check the expected output when queue::enable_profiling is specified |
| 16 | +// |
| 17 | +// WITH: ze_event_pool_desc_t flags set to: 5 |
| 18 | +// WITH: Device kernel time: |
| 19 | +// clang-format on |
| 20 | +// |
| 21 | + |
| 22 | +#include <CL/sycl.hpp> |
| 23 | +using namespace cl::sycl; |
| 24 | + |
| 25 | +int foo(queue &q, int n) { |
| 26 | + for (int i = 0; i < n; i++) { |
| 27 | + |
| 28 | + sycl::event queue_event = q.submit([&](handler &cgh) { |
| 29 | + cgh.parallel_for<class empty>(range<2>(10000, 10000), |
| 30 | + [=](item<2> item) {}); |
| 31 | + }); |
| 32 | + |
| 33 | + q.wait(); |
| 34 | + |
| 35 | + // Get kernel computation time |
| 36 | + try { |
| 37 | + auto startk = queue_event.template get_profiling_info< |
| 38 | + cl::sycl::info::event_profiling::command_start>(); |
| 39 | + auto endk = queue_event.template get_profiling_info< |
| 40 | + cl::sycl::info::event_profiling::command_end>(); |
| 41 | + auto kernel_time = |
| 42 | + (float)(endk - startk) * 1e-9f; // to seconds, 1e-6f to milliseconds |
| 43 | + printf("Device kernel time: %.12fs\n", (float)kernel_time); |
| 44 | + |
| 45 | + } catch (const sycl::exception &e) { |
| 46 | + std::cout << "SYCL exception caught: " << e.what() << '\n'; |
| 47 | + return 0; |
| 48 | + } |
| 49 | + } |
| 50 | + return n; |
| 51 | +} |
| 52 | + |
| 53 | +int main(int argc, char **argv) { |
| 54 | + |
| 55 | + bool profiling = argc > 1; |
| 56 | + |
| 57 | + { |
| 58 | + gpu_selector dev_sel; |
| 59 | + property_list propList{}; |
| 60 | + if (profiling) |
| 61 | + propList = cl::sycl::property::queue::enable_profiling(); |
| 62 | + |
| 63 | + queue q(dev_sel, propList); |
| 64 | + // Perform the computation |
| 65 | + foo(q, 2); |
| 66 | + } // SYCL scope |
| 67 | + |
| 68 | + return 0; |
| 69 | +} |
0 commit comments