@@ -212,6 +212,28 @@ static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) {
212
212
213
213
static_assert (sizeof (half) == sizeof(ggml_fp16_t ), "wrong fp16 size");
214
214
215
+ [[noreturn]]
216
+ static void ggml_cuda_error (const char * stmt, const char * func, const char * file, const int line, const char * msg) {
217
+ int id = -1 ; // in case cudaGetDevice fails
218
+ cudaGetDevice (&id);
219
+
220
+ fprintf (stderr, " CUDA error: %s\n " , msg);
221
+ fprintf (stderr, " current device: %d, in function %s at %s:%d\n " , id, func, file, line);
222
+ fprintf (stderr, " %s\n " , stmt);
223
+ // abort with GGML_ASSERT to get a stack trace
224
+ GGML_ASSERT (!" CUDA error" );
225
+ }
226
+
227
+ #define CUDA_CHECK_GEN (err, success, error_fn ) \
228
+ do { \
229
+ auto err_ = (err); \
230
+ if (err_ != (success)) { \
231
+ ggml_cuda_error (#err, __func__, __FILE__, __LINE__, error_fn (err_)); \
232
+ } \
233
+ } while (0 )
234
+
235
+ #define CUDA_CHECK (err ) CUDA_CHECK_GEN(err, cudaSuccess, cudaGetErrorString)
236
+
215
237
#if CUDART_VERSION >= 12000
216
238
static const char * cublas_get_error_str (const cublasStatus_t err) {
217
239
return cublasGetStatusString (err);
@@ -233,23 +255,16 @@ static_assert(sizeof(half) == sizeof(ggml_fp16_t), "wrong fp16 size");
233
255
}
234
256
#endif // CUDART_VERSION >= 12000
235
257
236
- [[noreturn]]
237
- static void ggml_cuda_error (const char * stmt, const char * func, const char * file, const int line, const char * msg) {
238
- fprintf (stderr, " CUDA error: %s: %s\n " , stmt, msg);
239
- fprintf (stderr, " in function %s at %s:%d\n " , func, file, line);
240
- GGML_ASSERT (!" CUDA error" );
241
- }
258
+ #define CUBLAS_CHECK (err ) CUDA_CHECK_GEN(err, CUBLAS_STATUS_SUCCESS, cublas_get_error_str)
242
259
243
- #define CUDA_CHECK (err ) do { auto err_ = (err); if (err_ != cudaSuccess) ggml_cuda_error (#err, __func__, __FILE__, __LINE__, cudaGetErrorString (err_)); } while (0 )
244
- #define CUBLAS_CHECK (err ) do { auto err_ = (err); if (err_ != CUBLAS_STATUS_SUCCESS) ggml_cuda_error (#err, __func__, __FILE__, __LINE__, cublas_get_error_str (err_)); } while (0 )
245
260
246
261
#if !defined(GGML_USE_HIPBLAS)
247
262
static const char * cu_get_error_str (CUresult err) {
248
263
const char * err_str;
249
264
cuGetErrorString (err, &err_str);
250
265
return err_str;
251
266
}
252
- #define CU_CHECK (err ) do { auto err_ = (err); if (err_ != CUDA_SUCCESS) ggml_cuda_error (#err, __func__, __FILE__, __LINE__, cu_get_error_str (err_)); } while ( 0 )
267
+ #define CU_CHECK (err ) CUDA_CHECK_GEN (err, CUDA_SUCCESS, cu_get_error_str)
253
268
#endif
254
269
255
270
#if CUDART_VERSION >= 11100
@@ -538,7 +553,6 @@ struct cuda_device_capabilities {
538
553
539
554
static cuda_device_capabilities g_device_caps[GGML_CUDA_MAX_DEVICES] = { {0 , false , 0 } };
540
555
541
-
542
556
static void * g_scratch_buffer = nullptr ;
543
557
static size_t g_scratch_size = 0 ; // disabled by default
544
558
static size_t g_scratch_offset = 0 ;
@@ -4727,7 +4741,6 @@ static __global__ void mul_mat_p021_f16_f32(
4727
4741
4728
4742
const int row_y = col_x;
4729
4743
4730
-
4731
4744
// y is not transposed but permuted
4732
4745
const int iy = channel*nrows_y + row_y;
4733
4746
@@ -7209,7 +7222,6 @@ inline void ggml_cuda_op_norm(
7209
7222
(void ) src1_dd;
7210
7223
}
7211
7224
7212
-
7213
7225
inline void ggml_cuda_op_group_norm (
7214
7226
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst,
7215
7227
const float * src0_dd, const float * src1_dd, float * dst_dd, const cudaStream_t & main_stream) {
@@ -7784,7 +7796,6 @@ inline void ggml_cuda_op_im2col(
7784
7796
(void ) src0_dd;
7785
7797
}
7786
7798
7787
-
7788
7799
inline void ggml_cuda_op_sum_rows (
7789
7800
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst,
7790
7801
const float * src0_dd, const float * src1_dd, float * dst_dd, const cudaStream_t & main_stream) {
0 commit comments