@@ -63,7 +63,7 @@ static int g_ggml_sycl_debug = 0;
63
63
// define for XMX in Intel GPU
64
64
// TODO: currently, it's not used for XMX really.
65
65
#if !defined(GGML_SYCL_FORCE_MMQ)
66
- #define SYCL_USE_XMX
66
+ #define SYCL_USE_XMX
67
67
#endif
68
68
69
69
// max batch size to use MMQ kernels when tensor cores are available
@@ -84,16 +84,16 @@ static int g_ggml_sycl_debug = 0;
84
84
typedef sycl::queue *queue_ptr;
85
85
86
86
enum ggml_sycl_backend_gpu_mode {
87
- SYCL_UNSET_GPU_MODE = -1 ,
88
- SYCL_SINGLE_GPU_MODE = 0 ,
89
- SYCL_MUL_GPU_MODE
87
+ SYCL_UNSET_GPU_MODE = -1 ,
88
+ SYCL_SINGLE_GPU_MODE = 0 ,
89
+ SYCL_MUL_GPU_MODE
90
90
};
91
91
92
92
static_assert (sizeof (sycl::half) == sizeof(ggml_fp16_t ), "wrong fp16 size");
93
93
94
94
static void crash () {
95
- int * ptr = NULL ;
96
- *ptr = 0 ;
95
+ int * ptr = NULL ;
96
+ *ptr = 0 ;
97
97
}
98
98
99
99
[[noreturn]] static void ggml_sycl_error (
@@ -102,9 +102,9 @@ static void crash() {
102
102
const char * file,
103
103
const int line,
104
104
const char * msg) {
105
- fprintf (stderr, " SYCL error: %s: %s\n " , stmt, msg);
106
- fprintf (stderr, " in function %s at %s:%d\n " , func, file, line);
107
- GGML_ABORT (" SYCL error" );
105
+ fprintf (stderr, " SYCL error: %s: %s\n " , stmt, msg);
106
+ fprintf (stderr, " in function %s at %s:%d\n " , func, file, line);
107
+ GGML_ABORT (" SYCL error" );
108
108
}
109
109
110
110
#define SYCL_CHECK (err ) \
@@ -141,40 +141,40 @@ static int g_all_sycl_device_count = -1;
141
141
static bool g_ggml_backend_sycl_buffer_type_initialized = false ;
142
142
143
143
static ggml_sycl_backend_gpu_mode g_ggml_sycl_backend_gpu_mode =
144
- SYCL_UNSET_GPU_MODE;
144
+ SYCL_UNSET_GPU_MODE;
145
145
146
146
static void * g_scratch_buffer = nullptr ;
147
147
static size_t g_scratch_size = 0 ; // disabled by default
148
148
static size_t g_scratch_offset = 0 ;
149
149
150
150
[[noreturn]] static inline void bad_arch (const sycl::stream& stream_ct1) {
151
- stream_ct1 << " ERROR: ggml-sycl was compiled without support for the "
152
- " current GPU architecture.\n " ;
153
- // __trap();
154
- std::exit (1 );
151
+ stream_ct1 << " ERROR: ggml-sycl was compiled without support for the "
152
+ " current GPU architecture.\n " ;
153
+ // __trap();
154
+ std::exit (1 );
155
155
156
- (void )bad_arch; // suppress unused function warning
156
+ (void )bad_arch; // suppress unused function warning
157
157
}
158
158
159
159
int get_current_device_id ();
160
160
161
161
inline dpct::err0 ggml_sycl_set_device (const int device) try {
162
162
163
- int current_device_id;
164
- SYCL_CHECK (CHECK_TRY_ERROR (current_device_id = get_current_device_id ()));
163
+ int current_device_id;
164
+ SYCL_CHECK (CHECK_TRY_ERROR (current_device_id = get_current_device_id ()));
165
165
166
- // GGML_SYCL_DEBUG("ggml_sycl_set_device device_id=%d,
167
- // current_device_id=%d\n", device, current_device);
168
- if (device == current_device_id) {
169
- return 0 ;
170
- }
166
+ // GGML_SYCL_DEBUG("ggml_sycl_set_device device_id=%d,
167
+ // current_device_id=%d\n", device, current_device);
168
+ if (device == current_device_id) {
169
+ return 0 ;
170
+ }
171
171
172
- return CHECK_TRY_ERROR (dpct::select_device (device));
172
+ return CHECK_TRY_ERROR (dpct::select_device (device));
173
173
} catch (sycl::exception const & exc) {
174
- std::cerr << exc.what () << " Exception caught at file:" << __FILE__
175
- << " , line:" << __LINE__ << std::endl;
176
- crash ();
177
- std::exit (1 );
174
+ std::cerr << exc.what () << " Exception caught at file:" << __FILE__
175
+ << " , line:" << __LINE__ << std::endl;
176
+ crash ();
177
+ std::exit (1 );
178
178
}
179
179
180
180
// ////////////////////
@@ -252,10 +252,10 @@ struct ggml_sycl_pool_alloc {
252
252
// backend interface
253
253
254
254
struct ggml_tensor_extra_gpu {
255
- void * data_device[GGML_SYCL_MAX_DEVICES]; // 1 pointer for each device for split
256
- // tensors
257
- dpct::event_ptr events[GGML_SYCL_MAX_DEVICES]
258
- [GGML_SYCL_MAX_STREAMS]; // events for synchronizing multiple GPUs
255
+ void * data_device[GGML_SYCL_MAX_DEVICES]; // 1 pointer for each device for split
256
+ // tensors
257
+ dpct::event_ptr events[GGML_SYCL_MAX_DEVICES]
258
+ [GGML_SYCL_MAX_STREAMS]; // events for synchronizing multiple GPUs
259
259
};
260
260
261
261
struct ggml_backend_sycl_context {
0 commit comments