Skip to content

Commit fa8eb17

Browse files
Merge pull request #985 from IntelPython/add-SyclDevice-sub-group-sizes-property
Add sycl device sub group sizes property
2 parents 530a7d7 + 99f8a91 commit fa8eb17

File tree

10 files changed

+116
-0
lines changed

10 files changed

+116
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2222
* Added C-API to `dpctl.program.SyclKernel` and `dpctl.program.SyclProgram`. Added type casters for new types to "dpctl4pybind11" and added an example demonstrating its use [#970](https://github.com/IntelPython/dpctl/pull/970).
2323
* Introduced "dpctl/sycl.pxd" Cython declaration file to streamline use of SYCL functions from Cython, and added an example demonstrating its use [#981](https://github.com/IntelPython/dpctl/pull/981).
2424
* Added experimental support for sharing data allocated on sub-devices via dlpack [#984](https://github.com/IntelPython/dpctl/pull/984).
25+
* Added `dpctl.SyclDevice.sub_group_sizes` property to retrieve supported sizes of sub-group by the device [#985](https://github.com/IntelPython/dpctl/pull/985).
2526

2627
### Changed
2728
* Improved queue compatibility testing in `dpctl.tensor`'s implementation module [#900](https://github.com/IntelPython/dpctl/pull/900).

dpctl/_backend.pxd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ cdef extern from "syclinterface/dpctl_sycl_device_interface.h":
204204
cdef uint64_t DPCTLDevice_GetGlobalMemCacheSize(const DPCTLSyclDeviceRef DRef)
205205
cdef _global_mem_cache_type DPCTLDevice_GetGlobalMemCacheType(
206206
const DPCTLSyclDeviceRef DRef)
207+
cdef size_t *DPCTLDevice_GetSubGroupSizes(const DPCTLSyclDeviceRef DRef,
208+
size_t *res_len)
207209

208210

209211
cdef extern from "syclinterface/dpctl_sycl_device_manager.h":

dpctl/_sycl_device.pyx

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ from ._backend cimport ( # noqa: E211
6565
DPCTLDevice_GetPreferredVectorWidthShort,
6666
DPCTLDevice_GetProfilingTimerResolution,
6767
DPCTLDevice_GetSubGroupIndependentForwardProgress,
68+
DPCTLDevice_GetSubGroupSizes,
6869
DPCTLDevice_GetVendor,
6970
DPCTLDevice_HasAspect,
7071
DPCTLDevice_Hash,
@@ -884,6 +885,28 @@ cdef class SyclDevice(_SyclDevice):
884885
self._device_ref
885886
)
886887

888+
@property
889+
def sub_group_sizes(self):
890+
""" Returns list of supported sub-group sizes for this device.
891+
892+
Returns:
893+
List[int]: List of supported sub-group sizes.
894+
"""
895+
cdef size_t *sg_sizes = NULL
896+
cdef size_t sg_sizes_len = 0
897+
cdef size_t i
898+
899+
sg_sizes = DPCTLDevice_GetSubGroupSizes(
900+
self._device_ref, &sg_sizes_len)
901+
if (sg_sizes is not NULL and sg_sizes_len > 0):
902+
res = list()
903+
for i in range(sg_sizes_len):
904+
res.append(sg_sizes[i])
905+
DPCTLSize_t_Array_Delete(sg_sizes)
906+
return res
907+
else:
908+
return []
909+
887910
@property
888911
def sycl_platform(self):
889912
""" Returns the platform associated with this device.

dpctl/tests/_device_attributes_checks.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,11 @@ def check_max_num_sub_groups(device):
115115
assert max_num_sub_groups > 0
116116

117117

118+
def check_sub_group_sizes(device):
119+
sg_sizes = device.sub_group_sizes
120+
assert all(el > 0 for el in sg_sizes)
121+
122+
118123
def check_has_aspect_host(device):
119124
try:
120125
device.has_aspect_host
@@ -605,6 +610,7 @@ def check_global_mem_cache_line_size(device):
605610
check_max_work_item_sizes,
606611
check_max_work_group_size,
607612
check_max_num_sub_groups,
613+
check_sub_group_sizes,
608614
check_is_accelerator,
609615
check_is_cpu,
610616
check_is_gpu,

examples/pybind11/use_dpctl_syclqueue/tests/test_queue_device.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,11 @@ def test_offload_array_mod():
5555
Ynp = X % modulus_p
5656

5757
assert np.array_equal(Y, Ynp)
58+
59+
60+
def test_get_sub_group_sizes():
61+
d = dpctl.SyclDevice()
62+
szs = uqd.get_sub_group_sizes(d)
63+
assert type(szs) is list
64+
assert all(type(el) is int for el in szs)
65+
szs == d.sub_group_sizes

examples/pybind11/use_dpctl_syclqueue/use_queue_device/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
get_device_global_mem_size,
2121
get_device_local_mem_size,
2222
get_max_compute_units,
23+
get_sub_group_sizes,
2324
offloaded_array_mod,
2425
)
2526

@@ -28,6 +29,7 @@
2829
"get_device_global_mem_size",
2930
"get_device_local_mem_size",
3031
"offloaded_array_mod",
32+
"get_sub_group_sizes",
3133
]
3234

3335
__doc__ = """

examples/pybind11/use_dpctl_syclqueue/use_queue_device/_example.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <cstdint>
3232
#include <pybind11/numpy.h>
3333
#include <pybind11/pybind11.h>
34+
#include <pybind11/stl.h>
3435

3536
namespace py = pybind11;
3637

@@ -84,6 +85,11 @@ offloaded_array_mod(sycl::queue q,
8485
return res;
8586
}
8687

88+
std::vector<std::size_t> get_sub_group_sizes(const sycl::device &d)
89+
{
90+
return d.get_info<sycl::info::device::sub_group_sizes>();
91+
}
92+
8793
PYBIND11_MODULE(_use_queue_device, m)
8894
{
8995
m.def(
@@ -100,4 +106,6 @@ PYBIND11_MODULE(_use_queue_device, m)
100106
"Computes amount of local memory of the given dpctl.SyclDevice");
101107
m.def("offloaded_array_mod", &offloaded_array_mod,
102108
"Compute offloaded modular reduction of integer-valued NumPy array");
109+
m.def("get_sub_group_sizes", &get_sub_group_sizes,
110+
"Gets info::device::sub_group_sizes property of given device");
103111
}

libsyclinterface/include/dpctl_sycl_device_interface.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,4 +651,17 @@ DPCTL_API
651651
DPCTLGlobalMemCacheType
652652
DPCTLDevice_GetGlobalMemCacheType(__dpctl_keep const DPCTLSyclDeviceRef DRef);
653653

654+
/*!
655+
* @brief Wrapper for get_info<info::device::sub_group_sizes>().
656+
*
657+
* @param DRef Opaque pointer to a ``sycl::device``
658+
* @param res_len Populated with size of the returned array
659+
* @return Returns the valid result if device exists else returns NULL.
660+
* @ingroup DeviceInterface
661+
*/
662+
DPCTL_API
663+
__dpctl_keep size_t *
664+
DPCTLDevice_GetSubGroupSizes(__dpctl_keep const DPCTLSyclDeviceRef DRef,
665+
size_t *res_len);
666+
654667
DPCTL_C_EXTERN_C_END

libsyclinterface/source/dpctl_sycl_device_interface.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -743,3 +743,30 @@ DPCTLDevice_GetGlobalMemCacheType(__dpctl_keep const DPCTLSyclDeviceRef DRef)
743743
return DPCTL_MEM_CACHE_TYPE_INDETERMINATE;
744744
}
745745
}
746+
747+
__dpctl_keep size_t *
748+
DPCTLDevice_GetSubGroupSizes(__dpctl_keep const DPCTLSyclDeviceRef DRef,
749+
size_t *res_len)
750+
{
751+
size_t *sizes = nullptr;
752+
std::vector<size_t> sg_sizes;
753+
*res_len = 0;
754+
auto D = unwrap<device>(DRef);
755+
if (D) {
756+
try {
757+
sg_sizes = D->get_info<info::device::sub_group_sizes>();
758+
*res_len = sg_sizes.size();
759+
} catch (std::exception const &e) {
760+
error_handler(e, __FILE__, __func__, __LINE__);
761+
}
762+
try {
763+
sizes = new size_t[sg_sizes.size()];
764+
} catch (std::exception const &e) {
765+
error_handler(e, __FILE__, __func__, __LINE__);
766+
}
767+
for (auto i = 0ul; (sizes != nullptr) && i < sg_sizes.size(); ++i) {
768+
sizes[i] = sg_sizes[i];
769+
}
770+
}
771+
return sizes;
772+
}

libsyclinterface/tests/test_sycl_device_interface.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,22 @@ TEST_P(TestDPCTLSyclDeviceInterface, ChkGetMaxNumSubGroups)
205205
EXPECT_TRUE(n > 0);
206206
}
207207

208+
TEST_P(TestDPCTLSyclDeviceInterface, ChkGetSubGroupSizes)
209+
{
210+
size_t sg_sizes_len = 0;
211+
size_t *sg_sizes = nullptr;
212+
EXPECT_NO_FATAL_FAILURE(
213+
sg_sizes = DPCTLDevice_GetSubGroupSizes(DRef, &sg_sizes_len));
214+
if (DPCTLDevice_IsAccelerator(DRef))
215+
EXPECT_TRUE(sg_sizes_len >= 0);
216+
else
217+
EXPECT_TRUE(sg_sizes_len > 0);
218+
for (size_t i = 0; i < sg_sizes_len; ++i) {
219+
EXPECT_TRUE(sg_sizes > 0);
220+
}
221+
EXPECT_NO_FATAL_FAILURE(DPCTLSize_t_Array_Delete(sg_sizes));
222+
}
223+
208224
TEST_P(TestDPCTLSyclDeviceInterface, ChkGetPlatform)
209225
{
210226
DPCTLSyclPlatformRef PRef = nullptr;
@@ -751,3 +767,13 @@ TEST_F(TestDPCTLSyclDeviceNullArgs, ChkGetGlobalMemCacheType)
751767
EXPECT_NO_FATAL_FAILURE(res = DPCTLDevice_GetGlobalMemCacheType(Null_DRef));
752768
ASSERT_TRUE(res == DPCTL_MEM_CACHE_TYPE_INDETERMINATE);
753769
}
770+
771+
TEST_F(TestDPCTLSyclDeviceNullArgs, ChkGetSubGroupSizes)
772+
{
773+
size_t *sg_sizes = nullptr;
774+
size_t sg_sizes_len = 0;
775+
EXPECT_NO_FATAL_FAILURE(
776+
sg_sizes = DPCTLDevice_GetSubGroupSizes(Null_DRef, &sg_sizes_len));
777+
ASSERT_TRUE(sg_sizes == nullptr);
778+
ASSERT_TRUE(sg_sizes_len == 0);
779+
}

0 commit comments

Comments
 (0)