1
- #define CL_TARGET_OPENCL_VERSION 220
1
+ #define CL_TARGET_OPENCL_VERSION GGML_OPENCL_TARGET_VERSION
2
2
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
3
3
4
4
// suppress warnings in CL headers for GCC and Clang
25
25
#include < vector>
26
26
#include < string>
27
27
#include < cmath>
28
+ #include < memory>
29
+ #include < charconv>
28
30
29
31
#undef MIN
30
32
#undef MAX
@@ -62,6 +64,97 @@ enum ADRENO_GPU_GEN {
62
64
X1E,
63
65
};
64
66
67
+ struct ggml_cl_version {
68
+ cl_uint major = 0 ;
69
+ cl_uint minor = 0 ;
70
+ };
71
+
72
+ // Parses a version string of form "XX.YY ". On an error returns ggml_cl_version with all zeroes.
73
+ static ggml_cl_version parse_cl_version (std::string_view str) {
74
+ size_t major_str_begin = 0 ;
75
+ size_t major_str_end = str.find (" ." , major_str_begin);
76
+ if (major_str_end == std::string::npos) {
77
+ return {};
78
+ }
79
+
80
+ size_t minor_str_begin = major_str_end + 1 ;
81
+ size_t minor_str_end = str.find (" " , minor_str_begin);
82
+ if (minor_str_end == std::string::npos) {
83
+ return {};
84
+ }
85
+
86
+ cl_uint version_major;
87
+ if (std::from_chars (str.data () + major_str_begin, str.data () + major_str_end, version_major).ec != std::errc{}) {
88
+ return {};
89
+ }
90
+
91
+ cl_uint version_minor;
92
+ if (std::from_chars (str.data () + minor_str_begin, str.data () + minor_str_end, version_minor).ec != std::errc{}) {
93
+ return {};
94
+ }
95
+ return { version_major, version_minor };
96
+ }
97
+
98
+ // Returns OpenCL platform's version. On an error returns ggml_cl_version with all zeroes.
99
+ static ggml_cl_version get_opencl_platform_version (cl_platform_id platform) {
100
+ size_t param_size;
101
+ CL_CHECK (clGetPlatformInfo (platform, CL_PLATFORM_VERSION, 0 , nullptr , ¶m_size));
102
+ std::unique_ptr<char []> param_storage (new char [param_size]);
103
+ CL_CHECK (clGetPlatformInfo (platform, CL_PLATFORM_VERSION, param_size, param_storage.get (), nullptr ));
104
+
105
+ auto param_value = std::string_view (param_storage.get (), param_size);
106
+ const std::string version_prefix = " OpenCL " ; // Suffix: "XX.YY <platform-specific-info>"
107
+ if (param_value.find (version_prefix) != 0 ) {
108
+ return {};
109
+ }
110
+ param_value.remove_prefix (version_prefix.length ());
111
+ return parse_cl_version (param_value);
112
+ }
113
+
114
+ // Return a version to use in OpenCL C compilation. On an error returns ggml_cl_version with all zeroes.
115
+ static ggml_cl_version get_opencl_c_version (ggml_cl_version platform_version, cl_device_id device) {
116
+ size_t param_size;
117
+
118
+ #if CL_TARGET_OPENCL_VERSION >= 300
119
+ if (platform_version.major >= 3 ) {
120
+ CL_CHECK (clGetDeviceInfo (device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0 , nullptr , ¶m_size));
121
+ if (!param_size) {
122
+ return {};
123
+ }
124
+
125
+ std::unique_ptr<cl_name_version[]> versions (new cl_name_version[param_size]);
126
+ CL_CHECK (clGetDeviceInfo (device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, param_size, versions.get (), nullptr ));
127
+ unsigned versions_count = param_size / sizeof (cl_name_version);
128
+
129
+ cl_version version_max = 0 ;
130
+ for (unsigned i = 0 ; i < versions_count; i++) {
131
+ version_max = std::max<cl_version>(versions[i].version , version_max);
132
+ }
133
+
134
+ return { CL_VERSION_MAJOR (version_max), CL_VERSION_MINOR (version_max) };
135
+ }
136
+ #else
137
+ GGML_UNUSED (platform_version);
138
+ #endif // CL_TARGET_OPENCL_VERSION >= 300
139
+
140
+ CL_CHECK (clGetDeviceInfo (device, CL_DEVICE_OPENCL_C_VERSION, 0 , nullptr , ¶m_size));
141
+ if (!param_size) {
142
+ return {};
143
+ }
144
+
145
+ std::unique_ptr<char []> param_storage (new char [param_size]);
146
+ CL_CHECK (clGetDeviceInfo (device, CL_DEVICE_OPENCL_C_VERSION, param_size, param_storage.get (), nullptr ));
147
+ auto param_value = std::string_view (param_storage.get (), param_size);
148
+
149
+ const std::string version_prefix = " OpenCL C " ; // Suffix: "XX.YY <platform-specific-info>"
150
+ if (param_value.find (version_prefix) != 0 ) {
151
+ return {};
152
+ }
153
+ param_value.remove_prefix (version_prefix.length ());
154
+
155
+ return parse_cl_version (param_value);
156
+ }
157
+
65
158
static ADRENO_GPU_GEN get_adreno_gpu_gen (const char *device_name) {
66
159
if (strstr (device_name, " 730" ) ||
67
160
strstr (device_name, " 740" ) ||
@@ -470,16 +563,11 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
470
563
// A local ref of cl_device_id for convenience
471
564
cl_device_id device = backend_ctx->device ;
472
565
566
+ ggml_cl_version platform_version = get_opencl_platform_version (default_device->platform ->id );
567
+
473
568
// Check device OpenCL version, OpenCL 2.0 or above is required
474
- size_t device_ver_str_size;
475
- clGetDeviceInfo (device, CL_DEVICE_VERSION, 0 , NULL , &device_ver_str_size);
476
- char *device_ver_buffer = (char *)alloca (device_ver_str_size + 1 );
477
- clGetDeviceInfo (device, CL_DEVICE_VERSION, device_ver_str_size, device_ver_buffer, NULL );
478
- device_ver_buffer[device_ver_str_size] = ' \0 ' ;
479
- GGML_LOG_INFO (" ggml_opencl: device OpenCL version: %s\n " , device_ver_buffer);
480
-
481
- if (strstr (device_ver_buffer, " OpenCL 2" ) == NULL &&
482
- strstr (device_ver_buffer, " OpenCL 3" ) == NULL ) {
569
+ ggml_cl_version opencl_c_version = get_opencl_c_version (platform_version, device);
570
+ if (opencl_c_version.major < 2 ) {
483
571
GGML_LOG_ERROR (" ggml_opencl: OpenCL 2.0 or above is required\n " );
484
572
return backend_ctx;
485
573
}
@@ -516,8 +604,7 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
516
604
517
605
// If OpenCL 3.0 is supported, then check for cl_khr_subgroups, which becomes
518
606
// optional in OpenCL 3.0 (cl_khr_subgroup is mandatory in OpenCL 2.x)
519
- if (strstr (device_ver_buffer, " OpenCL 3" ) &&
520
- strstr (ext_buffer, " cl_khr_subgroups" ) == NULL &&
607
+ if (opencl_c_version.major == 3 && strstr (ext_buffer, " cl_khr_subgroups" ) == NULL &&
521
608
strstr (ext_buffer, " cl_intel_subgroups" ) == NULL ) {
522
609
GGML_LOG_ERROR (" ggml_opencl: device does not support subgroups (cl_khr_subgroups or cl_intel_subgroups) "
523
610
" (note that subgroups is an optional feature in OpenCL 3.0)\n " );
@@ -581,9 +668,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
581
668
const std::string kernel_src = read_file (" ggml-opencl.cl" );
582
669
#endif
583
670
584
- std::string compile_opts =
585
- " -cl-std=CL2.0 -cl-mad-enable -cl-unsafe-math-optimizations "
586
- " -cl-finite-math-only -cl-fast-relaxed-math " ;
671
+ auto opencl_c_std =
672
+ std::string (" CL" ) + std::to_string (opencl_c_version.major ) + " ." + std::to_string (opencl_c_version.minor );
673
+
674
+ std::string compile_opts = std::string (" -cl-std=" ) + opencl_c_std +
675
+ " -cl-mad-enable -cl-unsafe-math-optimizations"
676
+ " -cl-finite-math-only -cl-fast-relaxed-math" ;
587
677
backend_ctx->program = build_program_from_source (context, device, kernel_src.c_str (), compile_opts);
588
678
589
679
// Non matmul kernels.
@@ -693,10 +783,10 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
693
783
CL_CHECK ((backend_ctx->kernel_transpose_16 = clCreateKernel (backend_ctx->program_transpose_16 , " kernel_transpose_16" , &err), err));
694
784
695
785
// Gemv general
696
- std::string CL_gemv_compile_opts =
697
- " -cl-std=CL2.0 "
698
- " -cl-mad-enable "
699
- " -DSIMDGROUP_WIDTH= " + std::to_string (backend_ctx->adreno_wave_size );
786
+ std::string CL_gemv_compile_opts = std::string ( " -cl-std= " ) + opencl_c_std +
787
+ " -cl-mad-enable "
788
+ " -DSIMDGROUP_WIDTH= " +
789
+ std::to_string (backend_ctx->adreno_wave_size );
700
790
if (has_vector_subgroup_broadcast) {
701
791
CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT " ;
702
792
}
@@ -713,12 +803,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
713
803
CL_CHECK ((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_general = clCreateKernel (backend_ctx->program_CL_gemv_general , " kernel_gemv_noshuffle" , &err), err));
714
804
715
805
// Gemv 2048, 16384
716
- CL_gemv_compile_opts =
717
- " -cl-std=CL2.0 "
718
- " -cl-mad-enable "
719
- " -DLINE_STRIDE_A=2048 "
720
- " -DBLOCK_STRIDE_A=16384 "
721
- " -DSIMDGROUP_WIDTH= " + std::to_string (backend_ctx->adreno_wave_size );
806
+ CL_gemv_compile_opts = std::string ( " -cl-std= " ) + opencl_c_std +
807
+ " -cl-mad-enable "
808
+ " -DLINE_STRIDE_A=2048 "
809
+ " -DBLOCK_STRIDE_A=16384 "
810
+ " -DSIMDGROUP_WIDTH= " +
811
+ std::to_string (backend_ctx->adreno_wave_size );
722
812
if (has_vector_subgroup_broadcast) {
723
813
CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT " ;
724
814
}
@@ -735,12 +825,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
735
825
CL_CHECK ((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_4096_1_4096 = clCreateKernel (backend_ctx->program_CL_gemv_4096_1_4096 , " kernel_gemv_noshuffle" , &err), err));
736
826
737
827
// Gemv 2048, 16384
738
- CL_gemv_compile_opts =
739
- " -cl-std=CL2.0 "
740
- " -cl-mad-enable "
741
- " -DLINE_STRIDE_A=2048 "
742
- " -DBLOCK_STRIDE_A=16384 "
743
- " -DSIMDGROUP_WIDTH= " + std::to_string (backend_ctx->adreno_wave_size );
828
+ CL_gemv_compile_opts = std::string ( " -cl-std= " ) + opencl_c_std +
829
+ " -cl-mad-enable "
830
+ " -DLINE_STRIDE_A=2048 "
831
+ " -DBLOCK_STRIDE_A=16384 "
832
+ " -DSIMDGROUP_WIDTH= " +
833
+ std::to_string (backend_ctx->adreno_wave_size );
744
834
if (has_vector_subgroup_broadcast) {
745
835
CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT " ;
746
836
}
@@ -750,12 +840,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
750
840
CL_CHECK ((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_4096_1_11008 = clCreateKernel (backend_ctx->program_CL_gemv_4096_1_11008 , " kernel_gemv_noshuffle" , &err), err));
751
841
752
842
// Gemv 5504, 44032
753
- CL_gemv_compile_opts =
754
- " -cl-std=CL2.0 "
755
- " -cl-mad-enable "
756
- " -DLINE_STRIDE_A=5504 "
757
- " -DBLOCK_STRIDE_A=44032 "
758
- " -DSIMDGROUP_WIDTH= " + std::to_string (backend_ctx->adreno_wave_size );
843
+ CL_gemv_compile_opts = std::string ( " -cl-std= " ) + opencl_c_std +
844
+ " -cl-mad-enable "
845
+ " -DLINE_STRIDE_A=5504 "
846
+ " -DBLOCK_STRIDE_A=44032 "
847
+ " -DSIMDGROUP_WIDTH= " +
848
+ std::to_string (backend_ctx->adreno_wave_size );
759
849
if (has_vector_subgroup_broadcast) {
760
850
CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT " ;
761
851
}
@@ -765,12 +855,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
765
855
CL_CHECK ((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_11008_1_4096 = clCreateKernel (backend_ctx->program_CL_gemv_11008_1_4096 , " kernel_gemv_noshuffle" , &err), err));
766
856
767
857
// Gemv 16000, 128000
768
- CL_gemv_compile_opts =
769
- " -cl-std=CL2.0 "
770
- " -cl-mad-enable "
771
- " -DLINE_STRIDE_A=16000 "
772
- " -DBLOCK_STRIDE_A=128000 "
773
- " -DSIMDGROUP_WIDTH= " + std::to_string (backend_ctx->adreno_wave_size );
858
+ CL_gemv_compile_opts = std::string ( " -cl-std= " ) + opencl_c_std +
859
+ " -cl-mad-enable "
860
+ " -DLINE_STRIDE_A=16000 "
861
+ " -DBLOCK_STRIDE_A=128000 "
862
+ " -DSIMDGROUP_WIDTH= " +
863
+ std::to_string (backend_ctx->adreno_wave_size );
774
864
if (has_vector_subgroup_broadcast) {
775
865
CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT " ;
776
866
}
0 commit comments