15
15
16
16
// backend buffer type
17
17
18
- ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft , size_t size ) {
18
+ GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft , size_t size ) {
19
19
return buft -> iface .alloc_buffer (buft , size );
20
20
}
21
21
22
22
size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft ) {
23
23
return buft -> iface .get_alignment (buft );
24
24
}
25
25
26
- size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft , struct ggml_tensor * tensor ) {
26
+ GGML_CALL size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft , struct ggml_tensor * tensor ) {
27
27
// get_alloc_size is optional, defaults to ggml_nbytes
28
28
if (buft -> iface .get_alloc_size ) {
29
29
return buft -> iface .get_alloc_size (buft , tensor );
@@ -44,7 +44,7 @@ bool ggml_backend_buft_is_host(ggml_backend_buffer_type_t buft) {
44
44
45
45
// backend buffer
46
46
47
- ggml_backend_buffer_t ggml_backend_buffer_init (
47
+ GGML_CALL ggml_backend_buffer_t ggml_backend_buffer_init (
48
48
ggml_backend_buffer_type_t buft ,
49
49
struct ggml_backend_buffer_i iface ,
50
50
ggml_backend_buffer_context_t context ,
@@ -86,7 +86,7 @@ void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) {
86
86
return base ;
87
87
}
88
88
89
- void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer , struct ggml_tensor * tensor ) {
89
+ GGML_CALL void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer , struct ggml_tensor * tensor ) {
90
90
// init_tensor is optional
91
91
if (buffer -> iface .init_tensor ) {
92
92
buffer -> iface .init_tensor (buffer , tensor );
@@ -156,15 +156,15 @@ void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_ten
156
156
backend -> iface .get_tensor_async (backend , tensor , data , offset , size );
157
157
}
158
158
159
- void ggml_backend_tensor_set (struct ggml_tensor * tensor , const void * data , size_t offset , size_t size ) {
159
+ GGML_CALL void ggml_backend_tensor_set (struct ggml_tensor * tensor , const void * data , size_t offset , size_t size ) {
160
160
GGML_ASSERT (tensor -> data != NULL && "tensor not allocated" );
161
161
GGML_ASSERT (tensor -> buffer != NULL && "tensor buffer not set" );
162
162
GGML_ASSERT (offset + size <= ggml_nbytes (tensor ) && "tensor write out of bounds" );
163
163
164
164
tensor -> buffer -> iface .set_tensor (tensor -> buffer , tensor , data , offset , size );
165
165
}
166
166
167
- void ggml_backend_tensor_get (const struct ggml_tensor * tensor , void * data , size_t offset , size_t size ) {
167
+ GGML_CALL void ggml_backend_tensor_get (const struct ggml_tensor * tensor , void * data , size_t offset , size_t size ) {
168
168
GGML_ASSERT (tensor -> data != NULL && "tensor not allocated" );
169
169
GGML_ASSERT (tensor -> buffer != NULL && "tensor buffer not set" );
170
170
GGML_ASSERT (offset + size <= ggml_nbytes (tensor ) && "tensor read out of bounds" );
@@ -271,9 +271,9 @@ struct ggml_backend_reg {
271
271
static struct ggml_backend_reg ggml_backend_registry [GGML_MAX_BACKENDS_REG ];
272
272
static size_t ggml_backend_registry_count = 0 ;
273
273
274
- static ggml_backend_t ggml_backend_reg_cpu_init (const char * params , void * user_data );
274
+ GGML_CALL static ggml_backend_t ggml_backend_reg_cpu_init (const char * params , void * user_data );
275
275
276
- static void ggml_backend_registry_init (void ) {
276
+ GGML_CALL static void ggml_backend_registry_init (void ) {
277
277
static bool initialized = false;
278
278
279
279
if (initialized ) {
@@ -392,39 +392,39 @@ ggml_backend_buffer_t ggml_backend_reg_alloc_buffer(size_t i, size_t size) {
392
392
393
393
// backend CPU
394
394
395
- static void * ggml_backend_cpu_buffer_get_base (ggml_backend_buffer_t buffer ) {
395
+ GGML_CALL static void * ggml_backend_cpu_buffer_get_base (ggml_backend_buffer_t buffer ) {
396
396
return (void * )buffer -> context ;
397
397
}
398
398
399
- static void ggml_backend_cpu_buffer_free_buffer (ggml_backend_buffer_t buffer ) {
399
+ GGML_CALL static void ggml_backend_cpu_buffer_free_buffer (ggml_backend_buffer_t buffer ) {
400
400
free (buffer -> context );
401
401
}
402
402
403
- static void ggml_backend_cpu_buffer_set_tensor (ggml_backend_buffer_t buffer , struct ggml_tensor * tensor , const void * data , size_t offset , size_t size ) {
403
+ GGML_CALL static void ggml_backend_cpu_buffer_set_tensor (ggml_backend_buffer_t buffer , struct ggml_tensor * tensor , const void * data , size_t offset , size_t size ) {
404
404
memcpy ((char * )tensor -> data + offset , data , size );
405
405
406
406
GGML_UNUSED (buffer );
407
407
}
408
408
409
- static void ggml_backend_cpu_buffer_get_tensor (ggml_backend_buffer_t buffer , const struct ggml_tensor * tensor , void * data , size_t offset , size_t size ) {
409
+ GGML_CALL static void ggml_backend_cpu_buffer_get_tensor (ggml_backend_buffer_t buffer , const struct ggml_tensor * tensor , void * data , size_t offset , size_t size ) {
410
410
memcpy (data , (const char * )tensor -> data + offset , size );
411
411
412
412
GGML_UNUSED (buffer );
413
413
}
414
414
415
- static void ggml_backend_cpu_buffer_cpy_tensor_from (ggml_backend_buffer_t buffer , struct ggml_tensor * src , struct ggml_tensor * dst ) {
415
+ GGML_CALL static void ggml_backend_cpu_buffer_cpy_tensor_from (ggml_backend_buffer_t buffer , struct ggml_tensor * src , struct ggml_tensor * dst ) {
416
416
ggml_backend_tensor_get (src , dst -> data , 0 , ggml_nbytes (src ));
417
417
418
418
GGML_UNUSED (buffer );
419
419
}
420
420
421
- static void ggml_backend_cpu_buffer_cpy_tensor_to (ggml_backend_buffer_t buffer , struct ggml_tensor * src , struct ggml_tensor * dst ) {
421
+ GGML_CALL static void ggml_backend_cpu_buffer_cpy_tensor_to (ggml_backend_buffer_t buffer , struct ggml_tensor * src , struct ggml_tensor * dst ) {
422
422
ggml_backend_tensor_set (dst , src -> data , 0 , ggml_nbytes (src ));
423
423
424
424
GGML_UNUSED (buffer );
425
425
}
426
426
427
- static void ggml_backend_cpu_buffer_clear (ggml_backend_buffer_t buffer , uint8_t value ) {
427
+ GGML_CALL static void ggml_backend_cpu_buffer_clear (ggml_backend_buffer_t buffer , uint8_t value ) {
428
428
memset (buffer -> context , value , buffer -> size );
429
429
}
430
430
@@ -453,7 +453,7 @@ static struct ggml_backend_buffer_i cpu_backend_buffer_i_from_ptr = {
453
453
454
454
static const size_t TENSOR_ALIGNMENT = 64 ; // should be enough for AVX 512
455
455
456
- static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer (ggml_backend_buffer_type_t buft , size_t size ) {
456
+ GGML_CALL static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer (ggml_backend_buffer_type_t buft , size_t size ) {
457
457
size += TENSOR_ALIGNMENT ; // malloc may return an address that is not aligned
458
458
void * data = malloc (size ); // TODO: maybe use GGML_ALIGNED_MALLOC?
459
459
@@ -462,25 +462,25 @@ static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_back
462
462
return ggml_backend_buffer_init (buft , cpu_backend_buffer_i , data , size );
463
463
}
464
464
465
- static size_t ggml_backend_cpu_buffer_type_get_alignment (ggml_backend_buffer_type_t buft ) {
465
+ GGML_CALL static size_t ggml_backend_cpu_buffer_type_get_alignment (ggml_backend_buffer_type_t buft ) {
466
466
return TENSOR_ALIGNMENT ;
467
467
468
468
GGML_UNUSED (buft );
469
469
}
470
470
471
- static bool ggml_backend_cpu_buffer_type_supports_backend (ggml_backend_buffer_type_t buft , ggml_backend_t backend ) {
471
+ GGML_CALL static bool ggml_backend_cpu_buffer_type_supports_backend (ggml_backend_buffer_type_t buft , ggml_backend_t backend ) {
472
472
return ggml_backend_is_cpu (backend );
473
473
474
474
GGML_UNUSED (buft );
475
475
}
476
476
477
- static bool ggml_backend_cpu_buffer_type_is_host (ggml_backend_buffer_type_t buft ) {
477
+ GGML_CALL static bool ggml_backend_cpu_buffer_type_is_host (ggml_backend_buffer_type_t buft ) {
478
478
return true;
479
479
480
480
GGML_UNUSED (buft );
481
481
}
482
482
483
- ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type (void ) {
483
+ GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type (void ) {
484
484
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type = {
485
485
/* .iface = */ {
486
486
/* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer ,
@@ -544,20 +544,20 @@ struct ggml_backend_cpu_context {
544
544
size_t work_size ;
545
545
};
546
546
547
- static const char * ggml_backend_cpu_name (ggml_backend_t backend ) {
547
+ GGML_CALL static const char * ggml_backend_cpu_name (ggml_backend_t backend ) {
548
548
return "CPU" ;
549
549
550
550
GGML_UNUSED (backend );
551
551
}
552
552
553
- static void ggml_backend_cpu_free (ggml_backend_t backend ) {
553
+ GGML_CALL static void ggml_backend_cpu_free (ggml_backend_t backend ) {
554
554
struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context * )backend -> context ;
555
555
free (cpu_ctx -> work_data );
556
556
free (cpu_ctx );
557
557
free (backend );
558
558
}
559
559
560
- static ggml_backend_buffer_type_t ggml_backend_cpu_get_default_buffer_type (ggml_backend_t backend ) {
560
+ GGML_CALL static ggml_backend_buffer_type_t ggml_backend_cpu_get_default_buffer_type (ggml_backend_t backend ) {
561
561
return ggml_backend_cpu_buffer_type ();
562
562
563
563
GGML_UNUSED (backend );
@@ -568,7 +568,7 @@ struct ggml_backend_plan_cpu {
568
568
struct ggml_cgraph cgraph ;
569
569
};
570
570
571
- static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create (ggml_backend_t backend , struct ggml_cgraph * cgraph ) {
571
+ GGML_CALL static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create (ggml_backend_t backend , struct ggml_cgraph * cgraph ) {
572
572
struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context * )backend -> context ;
573
573
574
574
struct ggml_backend_plan_cpu * cpu_plan = malloc (sizeof (struct ggml_backend_plan_cpu ));
@@ -583,7 +583,7 @@ static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(ggml_backend
583
583
return cpu_plan ;
584
584
}
585
585
586
- static void ggml_backend_cpu_graph_plan_free (ggml_backend_t backend , ggml_backend_graph_plan_t plan ) {
586
+ GGML_CALL static void ggml_backend_cpu_graph_plan_free (ggml_backend_t backend , ggml_backend_graph_plan_t plan ) {
587
587
struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu * )plan ;
588
588
589
589
free (cpu_plan -> cplan .work_data );
@@ -592,15 +592,15 @@ static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, ggml_backen
592
592
GGML_UNUSED (backend );
593
593
}
594
594
595
- static void ggml_backend_cpu_graph_plan_compute (ggml_backend_t backend , ggml_backend_graph_plan_t plan ) {
595
+ GGML_CALL static void ggml_backend_cpu_graph_plan_compute (ggml_backend_t backend , ggml_backend_graph_plan_t plan ) {
596
596
struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu * )plan ;
597
597
598
598
ggml_graph_compute (& cpu_plan -> cgraph , & cpu_plan -> cplan );
599
599
600
600
GGML_UNUSED (backend );
601
601
}
602
602
603
- static bool ggml_backend_cpu_graph_compute (ggml_backend_t backend , struct ggml_cgraph * cgraph ) {
603
+ GGML_CALL static bool ggml_backend_cpu_graph_compute (ggml_backend_t backend , struct ggml_cgraph * cgraph ) {
604
604
struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context * )backend -> context ;
605
605
606
606
struct ggml_cplan cplan = ggml_graph_plan (cgraph , cpu_ctx -> n_threads );
@@ -617,7 +617,7 @@ static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_c
617
617
return true;
618
618
}
619
619
620
- static bool ggml_backend_cpu_supports_op (ggml_backend_t backend , const struct ggml_tensor * op ) {
620
+ GGML_CALL static bool ggml_backend_cpu_supports_op (ggml_backend_t backend , const struct ggml_tensor * op ) {
621
621
switch (op -> op ) {
622
622
case GGML_OP_MUL_MAT :
623
623
return op -> src [1 ]-> type == GGML_TYPE_F32 || op -> src [1 ]-> type == ggml_internal_get_type_traits (op -> src [0 ]-> type ).vec_dot_type ;
@@ -660,7 +660,7 @@ ggml_backend_t ggml_backend_cpu_init(void) {
660
660
return cpu_backend ;
661
661
}
662
662
663
- bool ggml_backend_is_cpu (ggml_backend_t backend ) {
663
+ GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend ) {
664
664
return backend -> iface .get_name == ggml_backend_cpu_name ;
665
665
}
666
666
@@ -671,11 +671,11 @@ void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
671
671
ctx -> n_threads = n_threads ;
672
672
}
673
673
674
- ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr (void * ptr , size_t size ) {
674
+ GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr (void * ptr , size_t size ) {
675
675
return ggml_backend_buffer_init (ggml_backend_cpu_buffer_type (), cpu_backend_buffer_i_from_ptr , ptr , size );
676
676
}
677
677
678
- static ggml_backend_t ggml_backend_reg_cpu_init (const char * params , void * user_data ) {
678
+ GGML_CALL static ggml_backend_t ggml_backend_reg_cpu_init (const char * params , void * user_data ) {
679
679
return ggml_backend_cpu_init ();
680
680
681
681
GGML_UNUSED (params );
0 commit comments