Skip to content

Commit f73a1c4

Browse files
Implement Python API for ext_intel_device_info descriptors
https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_intel_device_info.md This includes HW characteristics for Intel Level-Zero GPU devices as well as access to PCI device-identifier.
1 parent 049cd77 commit f73a1c4

File tree

3 files changed

+233
-0
lines changed

3 files changed

+233
-0
lines changed

dpctl/utils/CMakeLists.txt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,24 @@ foreach(_cy_file ${_cython_sources})
44
get_filename_component(_trgt ${_cy_file} NAME_WLE)
55
build_dpctl_ext(${_trgt} ${_cy_file} "dpctl/utils")
66
endforeach()
7+
8+
add_custom_target(_dpctl4pybind11_header_ready
9+
DEPENDS
10+
_usmarray_copy_capi_include
11+
_memory_copy_capi_include
12+
_sycl_device_copy_capi_include
13+
_sycl_queue_copy_capi_include
14+
_sycl_context_copy_capi_include
15+
_sycl_event_copy_capi_include
16+
)
17+
18+
set(python_module_name _device_queries)
19+
pybind11_add_module(${python_module_name} MODULE
20+
${CMAKE_CURRENT_SOURCE_DIR}/src/device_queries.cpp
21+
)
22+
target_include_directories(${python_module_name}
23+
PRIVATE
24+
${CMAKE_CURRENT_SOURCE_DIR}/../include
25+
)
26+
add_dependencies(${python_module_name} _dpctl4pybind11_header_ready)
27+
install(TARGETS ${python_module_name} DESTINATION "dpctl/utils")

dpctl/utils/__init__.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,87 @@
1818
A collection of utility functions.
1919
"""
2020

21+
from .._sycl_device import SyclDevice
2122
from ._compute_follows_data import (
2223
ExecutionPlacementError,
2324
get_coerced_usm_type,
2425
get_execution_queue,
2526
validate_usm_type,
2627
)
28+
from ._device_queries import (
29+
intel_device_info_device_id,
30+
intel_device_info_gpu_eu_count,
31+
intel_device_info_gpu_eu_count_per_subslice,
32+
intel_device_info_gpu_eu_simd_width,
33+
intel_device_info_gpu_hw_threads_per_eu,
34+
intel_device_info_gpu_slices,
35+
intel_device_info_gpu_subslices_per_slice,
36+
intel_device_info_max_mem_bandwidth,
37+
)
2738
from ._onetrace_context import onetrace_enabled
2839

40+
41+
def intel_device_info(dev):
42+
"""intel_device_info(sycl_device)
43+
44+
For Intel(R) GPU devices returns a dictionary
45+
with device architectural details, and an empty
46+
dictionary otherwise. The dictionary contains
47+
the following keys:
48+
49+
device_id: 32-bits device PCI identifier
50+
gpu_eu_count: Total number of execution units
51+
gpu_hw_threads_per_eu: Number of thread contexts in EU
52+
gpu_eu_simd_width: Physical SIMD width of EU
53+
gpu_slices: Total number of slices
54+
gpu_subslices_per_slice: Number of sub-slices per slice
55+
gpu_eu_count_per_subslice: Number of EUs in subslice
56+
max_mem_bandwidth: Maximum memory bandwidth in bytes/second
57+
58+
Unsupported descriptors are omitted from the dictionary.
59+
Descriptors other than PCI identifier are supported only for
60+
SyclDevices with Leve-Zero backend.
61+
"""
62+
if not isinstance(dev, SyclDevice):
63+
raise TypeError(f"Expected dpctl.SyclDevice, got {type(dev)}")
64+
dev_id = intel_device_info_device_id(dev)
65+
if dev_id:
66+
res = {
67+
"device_id": dev_id,
68+
}
69+
if dev.has_aspect_gpu:
70+
eu_count = intel_device_info_gpu_eu_count(dev)
71+
if eu_count:
72+
res["gpu_eu_count"] = eu_count
73+
hw_threads = intel_device_info_gpu_hw_threads_per_eu(dev)
74+
if hw_threads:
75+
res["gpu_hw_threads_per_eu"] = hw_threads
76+
simd_w = intel_device_info_gpu_eu_simd_width(dev)
77+
if simd_w:
78+
res["gpu_eu_simd_width"] = simd_w
79+
n_slices = intel_device_info_gpu_slices(dev)
80+
if n_slices:
81+
res["gpu_slices"] = n_slices
82+
n_subslices = intel_device_info_gpu_subslices_per_slice(dev)
83+
if n_subslices:
84+
res["gpu_subslices_per_slice"] = n_subslices
85+
n_eu_per_subslice = intel_device_info_gpu_eu_count_per_subslice(dev)
86+
if n_eu_per_subslice:
87+
res["gpu_eu_count_per_subslice"] = n_eu_per_subslice
88+
bw = intel_device_info_max_mem_bandwidth(dev)
89+
if bw:
90+
res["max_mem_bandwidth"] = bw
91+
return res
92+
return dict()
93+
94+
95+
def _is_gen9(dev):
96+
if not isinstance(dev, SyclDevice):
97+
raise TypeError(f"Expected dpctl.SyclDevice, got {type(dev)}")
98+
dev_id = intel_device_info_device_id(dev)
99+
return (dev_id & 0xFF00) == 0x3E00
100+
101+
29102
__all__ = [
30103
"get_execution_queue",
31104
"get_coerced_usm_type",

dpctl/utils/src/device_queries.cpp

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
#include "dpctl4pybind11.hpp"
2+
#include <pybind11/pybind11.h>
3+
#include <pybind11/stl.h>
4+
#include <sycl/sycl.hpp>
5+
6+
#include <cstddef>
7+
#include <cstdint>
8+
9+
namespace
10+
{
11+
12+
std::uint32_t py_intel_device_id(const sycl::device &d)
13+
{
14+
static constexpr std::uint32_t device_id_unavailable = 0;
15+
16+
if (d.has(sycl::aspect::ext_intel_device_id)) {
17+
return d.get_info<sycl::ext::intel::info::device::device_id>();
18+
}
19+
20+
return device_id_unavailable;
21+
}
22+
23+
std::uint32_t py_intel_gpu_eu_count(const sycl::device &d)
24+
{
25+
static constexpr std::uint32_t eu_count_unavailable = 0;
26+
27+
if (d.has(sycl::aspect::ext_intel_gpu_eu_count)) {
28+
return d.get_info<sycl::ext::intel::info::device::gpu_eu_count>();
29+
}
30+
31+
return eu_count_unavailable;
32+
}
33+
34+
std::uint32_t py_intel_gpu_hw_threads_per_eu(const sycl::device &d)
35+
{
36+
static constexpr std::uint32_t thread_count_unavailable = 0;
37+
38+
if (d.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) {
39+
return d
40+
.get_info<sycl::ext::intel::info::device::gpu_hw_threads_per_eu>();
41+
}
42+
43+
return thread_count_unavailable;
44+
}
45+
46+
std::uint32_t py_intel_gpu_eu_simd_width(const sycl::device &d)
47+
{
48+
static constexpr std::uint32_t width_unavailable = 0;
49+
50+
if (d.has(sycl::aspect::ext_intel_gpu_eu_simd_width)) {
51+
return d.get_info<sycl::ext::intel::info::device::gpu_eu_simd_width>();
52+
}
53+
54+
return width_unavailable;
55+
}
56+
57+
std::uint32_t py_intel_gpu_slices(const sycl::device &d)
58+
{
59+
static constexpr std::uint32_t count_unavailable = 0;
60+
61+
if (d.has(sycl::aspect::ext_intel_gpu_slices)) {
62+
return d.get_info<sycl::ext::intel::info::device::gpu_slices>();
63+
}
64+
65+
return count_unavailable;
66+
}
67+
68+
std::uint32_t py_intel_gpu_subslices_per_slice(const sycl::device &d)
69+
{
70+
static constexpr std::uint32_t count_unavailable = 0;
71+
72+
if (d.has(sycl::aspect::ext_intel_gpu_subslices_per_slice)) {
73+
return d.get_info<
74+
sycl::ext::intel::info::device::gpu_subslices_per_slice>();
75+
}
76+
77+
return count_unavailable;
78+
}
79+
80+
std::uint32_t py_intel_gpu_eu_count_per_subslice(const sycl::device &d)
81+
{
82+
static constexpr std::uint32_t count_unavailable = 0;
83+
84+
if (d.has(sycl::aspect::ext_intel_gpu_eu_count_per_subslice)) {
85+
return d.get_info<
86+
sycl::ext::intel::info::device::gpu_eu_count_per_subslice>();
87+
}
88+
89+
return count_unavailable;
90+
}
91+
92+
std::uint64_t py_intel_max_mem_bandwidth(const sycl::device &d)
93+
{
94+
static constexpr std::uint64_t bandwidth_unavailable = 0;
95+
96+
if (d.has(sycl::aspect::ext_intel_max_mem_bandwidth)) {
97+
return d.get_info<sycl::ext::intel::info::device::max_mem_bandwidth>();
98+
}
99+
100+
return bandwidth_unavailable;
101+
}
102+
103+
}; // namespace
104+
105+
PYBIND11_MODULE(_device_queries, m)
106+
{
107+
m.def("intel_device_info_device_id", &py_intel_device_id,
108+
"Get ext_intel_device_id for the device, zero if not an intel device",
109+
py::arg("device"));
110+
111+
m.def("intel_device_info_gpu_eu_count", &py_intel_gpu_eu_count,
112+
"Returns the number of execution units (EUs) associated with the "
113+
"Intel GPU.",
114+
py::arg("device"));
115+
116+
m.def("intel_device_info_gpu_hw_threads_per_eu",
117+
&py_intel_gpu_hw_threads_per_eu,
118+
"Returns the number of hardware threads in EU.", py::arg("device"));
119+
120+
m.def("intel_device_info_gpu_eu_simd_width", &py_intel_gpu_eu_simd_width,
121+
"Returns the physical SIMD width of the execution unit (EU).",
122+
py::arg("device"));
123+
124+
m.def("intel_device_info_gpu_slices", &py_intel_gpu_slices,
125+
"Returns the number of slices in the GPU device, or zero.",
126+
py::arg("device"));
127+
128+
m.def("intel_device_info_gpu_subslices_per_slice",
129+
&py_intel_gpu_subslices_per_slice,
130+
"Returns the number of subslices per slice.", py::arg("device"));
131+
132+
m.def("intel_device_info_gpu_eu_count_per_subslice",
133+
&py_intel_gpu_eu_count_per_subslice,
134+
"Returns the number of EUs per subslice of GPU.", py::arg("device"));
135+
136+
m.def("intel_device_info_max_mem_bandwidth", &py_intel_max_mem_bandwidth,
137+
"Returns the maximum memory bandwidth in units of bytes/second.",
138+
py::arg("device"));
139+
}

0 commit comments

Comments
 (0)