Skip to content

Commit ef4b1ef

Browse files
OuadiElfaroukiarthw
authored andcommitted
[SYCL] Updated SYCL device filtering (ggml-org#8901)
* Updated device filter to depend on default_selector (fixes non-intel device issues) * Small related update to example/sycl Readme
1 parent c77d446 commit ef4b1ef

File tree

2 files changed

+82
-15
lines changed

2 files changed

+82
-15
lines changed

examples/sycl/README.md

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ This example program provides the tools for llama.cpp for SYCL on Intel GPU.
1212

1313
List all SYCL devices with ID, compute capability, max work group size, ect.
1414

15-
1. Build the llama.cpp for SYCL for all targets.
15+
1. Build the llama.cpp for SYCL for the specified target *(using GGML_SYCL_TARGET)*.
1616

17-
2. Enable oneAPI running environment
17+
2. Enable oneAPI running environment *(if GGML_SYCL_TARGET is set to INTEL -default-)*
1818

1919
```
2020
source /opt/intel/oneapi/setvars.sh
@@ -29,19 +29,13 @@ source /opt/intel/oneapi/setvars.sh
2929
Check the ID in startup log, like:
3030

3131
```
32-
found 4 SYCL devices:
33-
Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
34-
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
35-
Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
36-
max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
37-
Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
38-
max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
39-
Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
40-
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
32+
found 2 SYCL devices:
33+
| | | | |Max | |Max |Global | |
34+
| | | | |compute|Max work|sub |mem | |
35+
|ID| Device Type| Name|Version|units |group |group|size | Driver version|
36+
|--|-------------------|---------------------------------------|-------|-------|--------|-----|-------|---------------------|
37+
| 0| [level_zero:gpu:0]| Intel Arc A770 Graphics| 1.3| 512| 1024| 32| 16225M| 1.3.29138|
38+
| 1| [level_zero:gpu:1]| Intel UHD Graphics 750| 1.3| 32| 512| 32| 62631M| 1.3.29138|
4139
4240
```
4341

44-
|Attribute|Note|
45-
|-|-|
46-
|compute capability 1.3|Level-zero running time, recommended |
47-
|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|

ggml/src/ggml-sycl/dpct.hpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -886,6 +886,79 @@ namespace dpct
886886
return -1;
887887
}
888888
889+
inline std::string get_preferred_gpu_platform_name() {
890+
std::string result;
891+
892+
std::string filter = "";
893+
char* env = getenv("ONEAPI_DEVICE_SELECTOR");
894+
if (env) {
895+
if (std::strstr(env, "level_zero")) {
896+
filter = "level-zero";
897+
}
898+
else if (std::strstr(env, "opencl")) {
899+
filter = "opencl";
900+
}
901+
else if (std::strstr(env, "cuda")) {
902+
filter = "cuda";
903+
}
904+
else if (std::strstr(env, "hip")) {
905+
filter = "hip";
906+
}
907+
else {
908+
throw std::runtime_error("invalid device filter: " + std::string(env));
909+
}
910+
} else {
911+
auto default_device = sycl::device(sycl::default_selector_v);
912+
auto default_platform_name = default_device.get_platform().get_info<sycl::info::platform::name>();
913+
914+
if (std::strstr(default_platform_name.c_str(), "Level-Zero") || default_device.is_cpu()) {
915+
filter = "level-zero";
916+
}
917+
else if (std::strstr(default_platform_name.c_str(), "CUDA")) {
918+
filter = "cuda";
919+
}
920+
else if (std::strstr(default_platform_name.c_str(), "HIP")) {
921+
filter = "hip";
922+
}
923+
}
924+
925+
auto platform_list = sycl::platform::get_platforms();
926+
927+
for (const auto& platform : platform_list) {
928+
auto devices = platform.get_devices();
929+
auto gpu_dev = std::find_if(devices.begin(), devices.end(), [](const sycl::device& d) {
930+
return d.is_gpu();
931+
});
932+
933+
if (gpu_dev == devices.end()) {
934+
// cout << "platform [" << platform_name
935+
// << "] does not contain GPU devices, skipping\n";
936+
continue;
937+
}
938+
939+
auto platform_name = platform.get_info<sycl::info::platform::name>();
940+
std::string platform_name_low_case;
941+
platform_name_low_case.resize(platform_name.size());
942+
943+
std::transform(
944+
platform_name.begin(), platform_name.end(), platform_name_low_case.begin(), ::tolower);
945+
946+
if (platform_name_low_case.find(filter) == std::string::npos) {
947+
// cout << "platform [" << platform_name
948+
// << "] does not match with requested "
949+
// << filter << ", skipping\n";
950+
continue;
951+
}
952+
953+
result = platform_name;
954+
}
955+
956+
if (result.empty())
957+
throw std::runtime_error("can not find preferred GPU platform");
958+
959+
return result;
960+
}
961+
889962
template <class DeviceSelector>
890963
std::enable_if_t<
891964
std::is_invocable_r_v<int, DeviceSelector, const sycl::device &>>

0 commit comments

Comments
 (0)