@@ -63,7 +63,7 @@ static int g_ggml_sycl_debug = 0;
63
63
// define for XMX in Intel GPU
64
64
// TODO: currently, it's not used for XMX really.
65
65
#if !defined(GGML_SYCL_FORCE_MMQ)
66
- #define SYCL_USE_XMX
66
+ #define SYCL_USE_XMX
67
67
#endif
68
68
69
69
// max batch size to use MMQ kernels when tensor cores are available
@@ -84,16 +84,16 @@ static int g_ggml_sycl_debug = 0;
84
84
typedef sycl::queue *queue_ptr;
85
85
86
86
enum ggml_sycl_backend_gpu_mode {
87
- SYCL_UNSET_GPU_MODE = -1 ,
88
- SYCL_SINGLE_GPU_MODE = 0 ,
89
- SYCL_MUL_GPU_MODE
87
+ SYCL_UNSET_GPU_MODE = -1 ,
88
+ SYCL_SINGLE_GPU_MODE = 0 ,
89
+ SYCL_MUL_GPU_MODE
90
90
};
91
91
92
92
static_assert (sizeof (sycl::half) == sizeof(ggml_fp16_t ), "wrong fp16 size");
93
93
94
94
static void crash () {
95
- int * ptr = NULL ;
96
- *ptr = 0 ;
95
+ int * ptr = NULL ;
96
+ *ptr = 0 ;
97
97
}
98
98
99
99
[[noreturn]] static void ggml_sycl_error (
@@ -102,9 +102,9 @@ static void crash() {
102
102
const char * file,
103
103
const int line,
104
104
const char * msg) {
105
- fprintf (stderr, " SYCL error: %s: %s\n " , stmt, msg);
106
- fprintf (stderr, " in function %s at %s:%d\n " , func, file, line);
107
- GGML_ABORT (" SYCL error" );
105
+ fprintf (stderr, " SYCL error: %s: %s\n " , stmt, msg);
106
+ fprintf (stderr, " in function %s at %s:%d\n " , func, file, line);
107
+ GGML_ABORT (" SYCL error" );
108
108
}
109
109
110
110
#define SYCL_CHECK (err ) \
@@ -142,40 +142,40 @@ static int g_all_sycl_device_count = -1;
142
142
static bool g_ggml_backend_sycl_buffer_type_initialized = false ;
143
143
144
144
static ggml_sycl_backend_gpu_mode g_ggml_sycl_backend_gpu_mode =
145
- SYCL_UNSET_GPU_MODE;
145
+ SYCL_UNSET_GPU_MODE;
146
146
147
147
static void * g_scratch_buffer = nullptr ;
148
148
static size_t g_scratch_size = 0 ; // disabled by default
149
149
static size_t g_scratch_offset = 0 ;
150
150
151
151
[[noreturn]] static inline void bad_arch (const sycl::stream& stream_ct1) {
152
- stream_ct1 << " ERROR: ggml-sycl was compiled without support for the "
153
- " current GPU architecture.\n " ;
154
- // __trap();
155
- std::exit (1 );
152
+ stream_ct1 << " ERROR: ggml-sycl was compiled without support for the "
153
+ " current GPU architecture.\n " ;
154
+ // __trap();
155
+ std::exit (1 );
156
156
157
- (void )bad_arch; // suppress unused function warning
157
+ (void )bad_arch; // suppress unused function warning
158
158
}
159
159
160
160
int get_current_device_id ();
161
161
162
162
inline dpct::err0 ggml_sycl_set_device (const int device) try {
163
163
164
- int current_device_id;
165
- SYCL_CHECK (CHECK_TRY_ERROR (current_device_id = get_current_device_id ()));
164
+ int current_device_id;
165
+ SYCL_CHECK (CHECK_TRY_ERROR (current_device_id = get_current_device_id ()));
166
166
167
- // GGML_SYCL_DEBUG("ggml_sycl_set_device device_id=%d,
168
- // current_device_id=%d\n", device, current_device);
169
- if (device == current_device_id) {
170
- return 0 ;
171
- }
167
+ // GGML_SYCL_DEBUG("ggml_sycl_set_device device_id=%d,
168
+ // current_device_id=%d\n", device, current_device);
169
+ if (device == current_device_id) {
170
+ return 0 ;
171
+ }
172
172
173
- return CHECK_TRY_ERROR (dpct::select_device (device));
173
+ return CHECK_TRY_ERROR (dpct::select_device (device));
174
174
} catch (sycl::exception const & exc) {
175
- std::cerr << exc.what () << " Exception caught at file:" << __FILE__
176
- << " , line:" << __LINE__ << std::endl;
177
- crash ();
178
- std::exit (1 );
175
+ std::cerr << exc.what () << " Exception caught at file:" << __FILE__
176
+ << " , line:" << __LINE__ << std::endl;
177
+ crash ();
178
+ std::exit (1 );
179
179
}
180
180
181
181
// ////////////////////
@@ -253,10 +253,10 @@ struct ggml_sycl_pool_alloc {
253
253
// backend interface
254
254
255
255
struct ggml_tensor_extra_gpu {
256
- void * data_device[GGML_SYCL_MAX_DEVICES]; // 1 pointer for each device for split
257
- // tensors
258
- dpct::event_ptr events[GGML_SYCL_MAX_DEVICES]
259
- [GGML_SYCL_MAX_STREAMS]; // events for synchronizing multiple GPUs
256
+ void * data_device[GGML_SYCL_MAX_DEVICES]; // 1 pointer for each device for split
257
+ // tensors
258
+ dpct::event_ptr events[GGML_SYCL_MAX_DEVICES]
259
+ [GGML_SYCL_MAX_STREAMS]; // events for synchronizing multiple GPUs
260
260
};
261
261
262
262
struct ggml_backend_sycl_context {
0 commit comments