@@ -34,6 +34,11 @@ const char * ggml_backend_buft_name(ggml_backend_buffer_type_t buft) {
34
34
}
35
35
36
36
ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size) {
37
+ if (size == 0 ) {
38
+ // return a dummy buffer for zero-sized allocations
39
+ return ggml_backend_buffer_init (buft, {}, NULL , 0 );
40
+ }
41
+
37
42
return buft->iface .alloc_buffer (buft, size);
38
43
}
39
44
@@ -89,7 +94,7 @@ ggml_backend_buffer_t ggml_backend_buffer_init(
89
94
}
90
95
91
96
const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer) {
92
- return buffer-> iface . get_name ( buffer);
97
+ return ggml_backend_buft_name ( ggml_backend_buffer_get_type ( buffer) );
93
98
}
94
99
95
100
void ggml_backend_buffer_free (ggml_backend_buffer_t buffer) {
@@ -108,6 +113,11 @@ size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) {
108
113
}
109
114
110
115
void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer) {
116
+ // get_base is optional if the buffer is zero-sized
117
+ if (buffer->iface .get_base == NULL && buffer->size == 0 ) {
118
+ return NULL ;
119
+ }
120
+
111
121
void * base = buffer->iface .get_base (buffer);
112
122
113
123
GGML_ASSERT (base != NULL && " backend buffer base cannot be NULL" );
@@ -198,7 +208,7 @@ void ggml_backend_free(ggml_backend_t backend) {
198
208
}
199
209
200
210
ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type (ggml_backend_t backend) {
201
- return backend->iface . get_default_buffer_type (backend );
211
+ return ggml_backend_dev_buffer_type ( backend->device );
202
212
}
203
213
204
214
ggml_backend_buffer_t ggml_backend_alloc_buffer (ggml_backend_t backend, size_t size) {
@@ -238,43 +248,42 @@ void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_ten
238
248
void ggml_backend_tensor_set (struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
239
249
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src ->buffer : tensor->buffer ;
240
250
251
+ if (size == 0 ) {
252
+ return ;
253
+ }
254
+
241
255
GGML_ASSERT (buf != NULL && " tensor buffer not set" );
242
256
GGML_ASSERT (tensor->data != NULL && " tensor not allocated" );
243
257
GGML_ASSERT (offset + size <= ggml_nbytes (tensor) && " tensor write out of bounds" );
244
258
245
- if (!size) {
246
- return ;
247
- }
248
-
249
259
buf->iface .set_tensor (buf, tensor, data, offset, size);
250
260
}
251
261
252
262
void ggml_backend_tensor_get (const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
253
263
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src ->buffer : tensor->buffer ;
254
264
265
+ if (size == 0 ) {
266
+ return ;
267
+ }
268
+
255
269
GGML_ASSERT (buf != NULL && " tensor buffer not set" );
256
270
GGML_ASSERT (tensor->data != NULL && " tensor not allocated" );
257
271
GGML_ASSERT (offset + size <= ggml_nbytes (tensor) && " tensor read out of bounds" );
258
272
259
- if (!size) {
260
- return ;
261
- }
262
-
263
273
buf->iface .get_tensor (buf, tensor, data, offset, size);
264
274
}
265
275
266
276
GGML_API void ggml_backend_tensor_memset (struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
267
277
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src ->buffer : tensor->buffer ;
268
278
269
- GGML_ASSERT (buf != NULL && " tensor buffer not set" );
270
- GGML_ASSERT (tensor->data != NULL && " tensor not allocated" );
271
- GGML_ASSERT (offset + size <= ggml_nbytes (tensor) && " tensor write out of bounds" );
272
-
273
- if (!size) {
279
+ if (size == 0 ) {
274
280
return ;
275
281
}
276
282
277
- GGML_ASSERT (buf->iface .memset_tensor != NULL && " memset not supported by backend buffer" );
283
+ GGML_ASSERT (buf != NULL && " tensor buffer not set" );
284
+ GGML_ASSERT (tensor->data != NULL && " tensor not allocated" );
285
+ GGML_ASSERT (offset + size <= ggml_nbytes (tensor) && " tensor write out of bounds" );
286
+ GGML_ASSERT (buf->iface .memset_tensor != NULL && " memset not implemented by backend buffer" );
278
287
279
288
buf->iface .memset_tensor (buf, tensor, value, offset, size);
280
289
}
@@ -316,32 +325,15 @@ enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct
316
325
}
317
326
318
327
bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op) {
319
- // helper to ease transition to device interface
320
- if (backend->device ) {
321
- return ggml_backend_dev_supports_op (backend->device , op);
322
- }
323
-
324
- return backend->iface .supports_op (backend, op);
328
+ return ggml_backend_dev_supports_op (backend->device , op);
325
329
}
326
330
327
331
bool ggml_backend_supports_buft (ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
328
- // helper to ease transition to device interface
329
- if (backend->device ) {
330
- return ggml_backend_dev_supports_buft (backend->device , buft);
331
- }
332
- return backend->iface .supports_buft (backend, buft);
332
+ return ggml_backend_dev_supports_buft (backend->device , buft);
333
333
}
334
334
335
335
bool ggml_backend_offload_op (ggml_backend_t backend, const struct ggml_tensor * op) {
336
- // helper to ease transition to device interface
337
- if (backend->device ) {
338
- return ggml_backend_dev_offload_op (backend->device , op);
339
- }
340
-
341
- if (backend->iface .offload_op != NULL ) {
342
- return backend->iface .offload_op (backend, op);
343
- }
344
- return false ;
336
+ return ggml_backend_dev_offload_op (backend->device , op);
345
337
}
346
338
347
339
ggml_backend_dev_t ggml_backend_get_device (ggml_backend_t backend) {
@@ -713,12 +705,6 @@ ggml_backend_t ggml_backend_init_best(void) {
713
705
714
706
// backend CPU
715
707
716
- static const char * ggml_backend_cpu_buffer_get_name (ggml_backend_buffer_t buffer) {
717
- return " CPU" ;
718
-
719
- GGML_UNUSED (buffer);
720
- }
721
-
722
708
static void * ggml_backend_cpu_buffer_get_base (ggml_backend_buffer_t buffer) {
723
709
uintptr_t data = (uintptr_t )buffer->context ;
724
710
@@ -767,7 +753,6 @@ static void ggml_backend_cpu_buffer_clear(ggml_backend_buffer_t buffer, uint8_t
767
753
}
768
754
769
755
static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_i = {
770
- /* .get_name = */ ggml_backend_cpu_buffer_get_name,
771
756
/* .free_buffer = */ ggml_backend_cpu_buffer_free_buffer,
772
757
/* .get_base = */ ggml_backend_cpu_buffer_get_base,
773
758
/* .init_tensor = */ NULL , // no initialization required
@@ -780,7 +765,6 @@ static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_i = {
780
765
};
781
766
782
767
static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_from_ptr_i = {
783
- /* .get_name = */ ggml_backend_cpu_buffer_get_name,
784
768
/* .free_buffer = */ NULL , // ptr is not owned by the buffer, so it does not need to be freed
785
769
/* .get_base = */ ggml_backend_cpu_buffer_get_base,
786
770
/* .init_tensor = */ NULL , // no initialization required
@@ -799,19 +783,14 @@ static const char * ggml_backend_cpu_buffer_type_get_name(ggml_backend_buffer_ty
799
783
}
800
784
801
785
static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size) {
802
- auto alloc_size = size;
803
- if (alloc_size == 0 ) {
804
- alloc_size = 1 ;
805
- }
806
-
807
- void * data = ggml_aligned_malloc (alloc_size);
786
+ void * data = ggml_aligned_malloc (size);
808
787
809
788
if (data == NULL ) {
810
- GGML_LOG_ERROR (" %s: failed to allocate buffer of size %zu\n " , __func__, alloc_size );
789
+ GGML_LOG_ERROR (" %s: failed to allocate buffer of size %zu\n " , __func__, size );
811
790
return NULL ;
812
791
}
813
792
814
- return ggml_backend_buffer_init (buft, ggml_backend_cpu_buffer_i, data, alloc_size );
793
+ return ggml_backend_buffer_init (buft, ggml_backend_cpu_buffer_i, data, size );
815
794
}
816
795
817
796
static size_t ggml_backend_cpu_buffer_type_get_alignment (ggml_backend_buffer_type_t buft) {
@@ -923,12 +902,6 @@ static void ggml_backend_cpu_free(ggml_backend_t backend) {
923
902
delete backend;
924
903
}
925
904
926
- static ggml_backend_buffer_type_t ggml_backend_cpu_get_default_buffer_type (ggml_backend_t backend) {
927
- return ggml_backend_cpu_buffer_type ();
928
-
929
- GGML_UNUSED (backend);
930
- }
931
-
932
905
struct ggml_backend_plan_cpu {
933
906
struct ggml_cplan cplan;
934
907
struct ggml_cgraph cgraph;
@@ -998,7 +971,6 @@ static enum ggml_status ggml_backend_cpu_graph_compute(ggml_backend_t backend, s
998
971
static const struct ggml_backend_i ggml_backend_cpu_i = {
999
972
/* .get_name = */ ggml_backend_cpu_get_name,
1000
973
/* .free = */ ggml_backend_cpu_free,
1001
- /* .get_default_buffer_type = */ ggml_backend_cpu_get_default_buffer_type,
1002
974
/* .set_tensor_async = */ NULL ,
1003
975
/* .get_tensor_async = */ NULL ,
1004
976
/* .cpy_tensor_async = */ NULL ,
@@ -1008,9 +980,6 @@ static const struct ggml_backend_i ggml_backend_cpu_i = {
1008
980
/* .graph_plan_update = */ NULL ,
1009
981
/* .graph_plan_compute = */ ggml_backend_cpu_graph_plan_compute,
1010
982
/* .graph_compute = */ ggml_backend_cpu_graph_compute,
1011
- /* .supports_op = */ NULL ,
1012
- /* .supports_buft = */ NULL ,
1013
- /* .offload_op = */ NULL ,
1014
983
/* .event_record = */ NULL ,
1015
984
/* .event_wait = */ NULL ,
1016
985
};
@@ -1315,12 +1284,6 @@ struct ggml_backend_multi_buffer_context {
1315
1284
size_t n_buffers;
1316
1285
};
1317
1286
1318
- static const char * ggml_backend_multi_buffer_get_name (ggml_backend_buffer_t buffer) {
1319
- ggml_backend_multi_buffer_context * ctx = (ggml_backend_multi_buffer_context *) buffer->context ;
1320
-
1321
- return ctx->buffers [0 ]->iface .get_name (ctx->buffers [0 ]);
1322
- }
1323
-
1324
1287
static void ggml_backend_multi_buffer_free_buffer (ggml_backend_buffer_t buffer) {
1325
1288
ggml_backend_multi_buffer_context * ctx = (ggml_backend_multi_buffer_context *) buffer->context ;
1326
1289
for (size_t i = 0 ; i < ctx->n_buffers ; i++) {
@@ -1339,7 +1302,6 @@ static void ggml_backend_multi_buffer_clear(ggml_backend_buffer_t buffer, uint8_
1339
1302
}
1340
1303
1341
1304
static const struct ggml_backend_buffer_i ggml_backend_multi_buffer_i = {
1342
- /* .get_name = */ ggml_backend_multi_buffer_get_name,
1343
1305
/* .free_buffer = */ ggml_backend_multi_buffer_free_buffer,
1344
1306
/* .get_base = */ NULL ,
1345
1307
/* .init_tensor = */ NULL ,
@@ -1368,7 +1330,7 @@ ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer
1368
1330
}
1369
1331
1370
1332
bool ggml_backend_buffer_is_multi_buffer (ggml_backend_buffer_t buffer) {
1371
- return buffer->iface .get_name == ggml_backend_multi_buffer_get_name ;
1333
+ return buffer->iface .free_buffer == ggml_backend_multi_buffer_free_buffer ;
1372
1334
}
1373
1335
1374
1336
void ggml_backend_multi_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage) {
0 commit comments