Skip to content

Commit 8acdacb

Browse files
authored
opencl: use OpenCL C standard supported by the device (#12221)
This patch nudges the llama.cpp a bit to be supported on PoCL which doesn't support OpenCL C CL2.0. The issue is solved by querying the device for the supported OpenCL C versions and using the highest one available.
1 parent 89b2b56 commit 8acdacb

File tree

3 files changed

+136
-43
lines changed

3 files changed

+136
-43
lines changed

ggml/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,8 @@ option(GGML_OPENCL "ggml: use OpenCL"
195195
option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF)
196196
option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
197197
option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
198+
set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
199+
"gmml: OpenCL API version to target")
198200

199201
# toolchain for vulkan-shaders-gen
200202
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")

ggml/src/ggml-opencl/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ if (GGML_OPENCL_PROFILING)
1515
endif ()
1616

1717
add_compile_definitions(GGML_OPENCL_SOA_Q)
18+
add_compile_definitions(GGML_OPENCL_TARGET_VERSION=${GGML_OPENCL_TARGET_VERSION})
1819

1920
if (GGML_OPENCL_USE_ADRENO_KERNELS)
2021
message(STATUS "OpenCL will use matmul kernels optimized for Adreno")

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 133 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#define CL_TARGET_OPENCL_VERSION 220
1+
#define CL_TARGET_OPENCL_VERSION GGML_OPENCL_TARGET_VERSION
22
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
33

44
// suppress warnings in CL headers for GCC and Clang
@@ -25,6 +25,8 @@
2525
#include <vector>
2626
#include <string>
2727
#include <cmath>
28+
#include <memory>
29+
#include <charconv>
2830

2931
#undef MIN
3032
#undef MAX
@@ -62,6 +64,97 @@ enum ADRENO_GPU_GEN {
6264
X1E,
6365
};
6466

67+
struct ggml_cl_version {
68+
cl_uint major = 0;
69+
cl_uint minor = 0;
70+
};
71+
72+
// Parses a version string of form "XX.YY ". On an error returns ggml_cl_version with all zeroes.
73+
static ggml_cl_version parse_cl_version(std::string_view str) {
74+
size_t major_str_begin = 0;
75+
size_t major_str_end = str.find(".", major_str_begin);
76+
if (major_str_end == std::string::npos) {
77+
return {};
78+
}
79+
80+
size_t minor_str_begin = major_str_end + 1;
81+
size_t minor_str_end = str.find(" ", minor_str_begin);
82+
if (minor_str_end == std::string::npos) {
83+
return {};
84+
}
85+
86+
cl_uint version_major;
87+
if (std::from_chars(str.data() + major_str_begin, str.data() + major_str_end, version_major).ec != std::errc{}) {
88+
return {};
89+
}
90+
91+
cl_uint version_minor;
92+
if (std::from_chars(str.data() + minor_str_begin, str.data() + minor_str_end, version_minor).ec != std::errc{}) {
93+
return {};
94+
}
95+
return { version_major, version_minor };
96+
}
97+
98+
// Returns OpenCL platform's version. On an error returns ggml_cl_version with all zeroes.
99+
static ggml_cl_version get_opencl_platform_version(cl_platform_id platform) {
100+
size_t param_size;
101+
CL_CHECK(clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, nullptr, &param_size));
102+
std::unique_ptr<char[]> param_storage(new char[param_size]);
103+
CL_CHECK(clGetPlatformInfo(platform, CL_PLATFORM_VERSION, param_size, param_storage.get(), nullptr));
104+
105+
auto param_value = std::string_view(param_storage.get(), param_size);
106+
const std::string version_prefix = "OpenCL "; // Suffix: "XX.YY <platform-specific-info>"
107+
if (param_value.find(version_prefix) != 0) {
108+
return {};
109+
}
110+
param_value.remove_prefix(version_prefix.length());
111+
return parse_cl_version(param_value);
112+
}
113+
114+
// Return a version to use in OpenCL C compilation. On an error returns ggml_cl_version with all zeroes.
115+
static ggml_cl_version get_opencl_c_version(ggml_cl_version platform_version, cl_device_id device) {
116+
size_t param_size;
117+
118+
#if CL_TARGET_OPENCL_VERSION >= 300
119+
if (platform_version.major >= 3) {
120+
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr, &param_size));
121+
if (!param_size) {
122+
return {};
123+
}
124+
125+
std::unique_ptr<cl_name_version[]> versions(new cl_name_version[param_size]);
126+
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, param_size, versions.get(), nullptr));
127+
unsigned versions_count = param_size / sizeof(cl_name_version);
128+
129+
cl_version version_max = 0;
130+
for (unsigned i = 0; i < versions_count; i++) {
131+
version_max = std::max<cl_version>(versions[i].version, version_max);
132+
}
133+
134+
return { CL_VERSION_MAJOR(version_max), CL_VERSION_MINOR(version_max) };
135+
}
136+
#else
137+
GGML_UNUSED(platform_version);
138+
#endif // CL_TARGET_OPENCL_VERSION >= 300
139+
140+
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, 0, nullptr, &param_size));
141+
if (!param_size) {
142+
return {};
143+
}
144+
145+
std::unique_ptr<char[]> param_storage(new char[param_size]);
146+
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, param_size, param_storage.get(), nullptr));
147+
auto param_value = std::string_view(param_storage.get(), param_size);
148+
149+
const std::string version_prefix = "OpenCL C "; // Suffix: "XX.YY <platform-specific-info>"
150+
if (param_value.find(version_prefix) != 0) {
151+
return {};
152+
}
153+
param_value.remove_prefix(version_prefix.length());
154+
155+
return parse_cl_version(param_value);
156+
}
157+
65158
static ADRENO_GPU_GEN get_adreno_gpu_gen(const char *device_name) {
66159
if (strstr(device_name, "730") ||
67160
strstr(device_name, "740") ||
@@ -470,16 +563,11 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
470563
// A local ref of cl_device_id for convenience
471564
cl_device_id device = backend_ctx->device;
472565

566+
ggml_cl_version platform_version = get_opencl_platform_version(default_device->platform->id);
567+
473568
// Check device OpenCL version, OpenCL 2.0 or above is required
474-
size_t device_ver_str_size;
475-
clGetDeviceInfo(device, CL_DEVICE_VERSION, 0, NULL, &device_ver_str_size);
476-
char *device_ver_buffer = (char *)alloca(device_ver_str_size + 1);
477-
clGetDeviceInfo(device, CL_DEVICE_VERSION, device_ver_str_size, device_ver_buffer, NULL);
478-
device_ver_buffer[device_ver_str_size] = '\0';
479-
GGML_LOG_INFO("ggml_opencl: device OpenCL version: %s\n", device_ver_buffer);
480-
481-
if (strstr(device_ver_buffer, "OpenCL 2") == NULL &&
482-
strstr(device_ver_buffer, "OpenCL 3") == NULL) {
569+
ggml_cl_version opencl_c_version = get_opencl_c_version(platform_version, device);
570+
if (opencl_c_version.major < 2) {
483571
GGML_LOG_ERROR("ggml_opencl: OpenCL 2.0 or above is required\n");
484572
return backend_ctx;
485573
}
@@ -516,8 +604,7 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
516604

517605
// If OpenCL 3.0 is supported, then check for cl_khr_subgroups, which becomes
518606
// optional in OpenCL 3.0 (cl_khr_subgroup is mandatory in OpenCL 2.x)
519-
if (strstr(device_ver_buffer, "OpenCL 3") &&
520-
strstr(ext_buffer, "cl_khr_subgroups") == NULL &&
607+
if (opencl_c_version.major == 3 && strstr(ext_buffer, "cl_khr_subgroups") == NULL &&
521608
strstr(ext_buffer, "cl_intel_subgroups") == NULL) {
522609
GGML_LOG_ERROR("ggml_opencl: device does not support subgroups (cl_khr_subgroups or cl_intel_subgroups) "
523610
"(note that subgroups is an optional feature in OpenCL 3.0)\n");
@@ -581,9 +668,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
581668
const std::string kernel_src = read_file("ggml-opencl.cl");
582669
#endif
583670

584-
std::string compile_opts =
585-
"-cl-std=CL2.0 -cl-mad-enable -cl-unsafe-math-optimizations "
586-
"-cl-finite-math-only -cl-fast-relaxed-math ";
671+
auto opencl_c_std =
672+
std::string("CL") + std::to_string(opencl_c_version.major) + "." + std::to_string(opencl_c_version.minor);
673+
674+
std::string compile_opts = std::string("-cl-std=") + opencl_c_std +
675+
" -cl-mad-enable -cl-unsafe-math-optimizations"
676+
" -cl-finite-math-only -cl-fast-relaxed-math";
587677
backend_ctx->program = build_program_from_source(context, device, kernel_src.c_str(), compile_opts);
588678

589679
// Non matmul kernels.
@@ -693,10 +783,10 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
693783
CL_CHECK((backend_ctx->kernel_transpose_16 = clCreateKernel(backend_ctx->program_transpose_16, "kernel_transpose_16", &err), err));
694784

695785
// Gemv general
696-
std::string CL_gemv_compile_opts =
697-
" -cl-std=CL2.0 "
698-
" -cl-mad-enable "
699-
" -DSIMDGROUP_WIDTH=" + std::to_string(backend_ctx->adreno_wave_size);
786+
std::string CL_gemv_compile_opts = std::string("-cl-std=") + opencl_c_std +
787+
" -cl-mad-enable "
788+
" -DSIMDGROUP_WIDTH=" +
789+
std::to_string(backend_ctx->adreno_wave_size);
700790
if (has_vector_subgroup_broadcast) {
701791
CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT ";
702792
}
@@ -713,12 +803,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
713803
CL_CHECK((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_general = clCreateKernel(backend_ctx->program_CL_gemv_general, "kernel_gemv_noshuffle", &err), err));
714804

715805
// Gemv 2048, 16384
716-
CL_gemv_compile_opts =
717-
" -cl-std=CL2.0 "
718-
" -cl-mad-enable "
719-
" -DLINE_STRIDE_A=2048 "
720-
" -DBLOCK_STRIDE_A=16384 "
721-
" -DSIMDGROUP_WIDTH=" + std::to_string(backend_ctx->adreno_wave_size);
806+
CL_gemv_compile_opts = std::string("-cl-std=") + opencl_c_std +
807+
" -cl-mad-enable "
808+
" -DLINE_STRIDE_A=2048 "
809+
" -DBLOCK_STRIDE_A=16384 "
810+
" -DSIMDGROUP_WIDTH=" +
811+
std::to_string(backend_ctx->adreno_wave_size);
722812
if (has_vector_subgroup_broadcast) {
723813
CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT ";
724814
}
@@ -735,12 +825,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
735825
CL_CHECK((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_4096_1_4096 = clCreateKernel(backend_ctx->program_CL_gemv_4096_1_4096, "kernel_gemv_noshuffle", &err), err));
736826

737827
// Gemv 2048, 16384
738-
CL_gemv_compile_opts =
739-
" -cl-std=CL2.0 "
740-
" -cl-mad-enable "
741-
" -DLINE_STRIDE_A=2048 "
742-
" -DBLOCK_STRIDE_A=16384 "
743-
" -DSIMDGROUP_WIDTH=" + std::to_string(backend_ctx->adreno_wave_size);
828+
CL_gemv_compile_opts = std::string("-cl-std=") + opencl_c_std +
829+
" -cl-mad-enable "
830+
" -DLINE_STRIDE_A=2048 "
831+
" -DBLOCK_STRIDE_A=16384 "
832+
" -DSIMDGROUP_WIDTH=" +
833+
std::to_string(backend_ctx->adreno_wave_size);
744834
if (has_vector_subgroup_broadcast) {
745835
CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT ";
746836
}
@@ -750,12 +840,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
750840
CL_CHECK((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_4096_1_11008 = clCreateKernel(backend_ctx->program_CL_gemv_4096_1_11008, "kernel_gemv_noshuffle", &err), err));
751841

752842
// Gemv 5504, 44032
753-
CL_gemv_compile_opts =
754-
" -cl-std=CL2.0 "
755-
" -cl-mad-enable "
756-
" -DLINE_STRIDE_A=5504 "
757-
" -DBLOCK_STRIDE_A=44032 "
758-
" -DSIMDGROUP_WIDTH=" + std::to_string(backend_ctx->adreno_wave_size);
843+
CL_gemv_compile_opts = std::string("-cl-std=") + opencl_c_std +
844+
" -cl-mad-enable "
845+
" -DLINE_STRIDE_A=5504 "
846+
" -DBLOCK_STRIDE_A=44032 "
847+
" -DSIMDGROUP_WIDTH=" +
848+
std::to_string(backend_ctx->adreno_wave_size);
759849
if (has_vector_subgroup_broadcast) {
760850
CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT ";
761851
}
@@ -765,12 +855,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
765855
CL_CHECK((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_11008_1_4096 = clCreateKernel(backend_ctx->program_CL_gemv_11008_1_4096, "kernel_gemv_noshuffle", &err), err));
766856

767857
// Gemv 16000, 128000
768-
CL_gemv_compile_opts =
769-
" -cl-std=CL2.0 "
770-
" -cl-mad-enable "
771-
" -DLINE_STRIDE_A=16000 "
772-
" -DBLOCK_STRIDE_A=128000 "
773-
" -DSIMDGROUP_WIDTH=" + std::to_string(backend_ctx->adreno_wave_size);
858+
CL_gemv_compile_opts = std::string("-cl-std=") + opencl_c_std +
859+
" -cl-mad-enable "
860+
" -DLINE_STRIDE_A=16000 "
861+
" -DBLOCK_STRIDE_A=128000 "
862+
" -DSIMDGROUP_WIDTH=" +
863+
std::to_string(backend_ctx->adreno_wave_size);
774864
if (has_vector_subgroup_broadcast) {
775865
CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT ";
776866
}

0 commit comments

Comments
 (0)