Skip to content

Commit aa7a2c4

Browse files
committed
sync : ggml (backend v2) (wip)
1 parent 4ff1046 commit aa7a2c4

File tree

9 files changed

+1870
-769
lines changed

9 files changed

+1870
-769
lines changed

ggml-alloc.c

Lines changed: 379 additions & 207 deletions
Large diffs are not rendered by default.

ggml-alloc.h

Lines changed: 68 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,27 +6,79 @@
66
extern "C" {
77
#endif
88

9+
struct ggml_backend;
910
struct ggml_backend_buffer;
1011

11-
GGML_API struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment);
12-
GGML_API struct ggml_allocr * ggml_allocr_new_measure(size_t alignment);
13-
GGML_API struct ggml_allocr * ggml_allocr_new_from_buffer(struct ggml_backend_buffer * buffer);
12+
//
13+
// Legacy API
14+
//
15+
16+
typedef struct ggml_allocr * ggml_allocr_t;
17+
18+
// initialize allocator for use with CPU backend only
19+
GGML_API ggml_allocr_t ggml_allocr_new(void * data, size_t size, size_t alignment);
20+
GGML_API ggml_allocr_t ggml_allocr_new_measure(size_t alignment);
21+
22+
// initialize allocator for use with ggml-backend
23+
GGML_API ggml_allocr_t ggml_allocr_new_from_buffer(struct ggml_backend_buffer * buffer);
24+
GGML_API ggml_allocr_t ggml_allocr_new_from_backend(struct ggml_backend * backend, size_t size); // allocates an owned buffer
25+
GGML_API ggml_allocr_t ggml_allocr_new_measure_from_backend(struct ggml_backend * backend);
26+
27+
GGML_API struct ggml_backend_buffer * ggml_allocr_get_buffer(ggml_allocr_t alloc);
1428

1529
// tell the allocator to parse nodes following the order described in the list
1630
// you should call this if your graph are optimized to execute out-of-order
17-
GGML_API void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, const int * list, int n);
18-
19-
GGML_API void ggml_allocr_free (struct ggml_allocr * alloc);
20-
GGML_API bool ggml_allocr_is_measure (struct ggml_allocr * alloc);
21-
GGML_API void ggml_allocr_reset (struct ggml_allocr * alloc);
22-
GGML_API void ggml_allocr_alloc (struct ggml_allocr * alloc, struct ggml_tensor * tensor);
23-
GGML_API size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * graph);
24-
GGML_API size_t ggml_allocr_max_size (struct ggml_allocr * alloc);
25-
26-
GGML_API size_t ggml_allocr_alloc_graph_n(
27-
struct ggml_allocr * alloc,
28-
struct ggml_cgraph ** graphs, int n_graphs,
29-
struct ggml_tensor *** inputs, struct ggml_tensor *** outputs);
31+
GGML_API void ggml_allocr_set_parse_seq(ggml_allocr_t alloc, const int * list, int n);
32+
33+
GGML_API void ggml_allocr_free (ggml_allocr_t alloc);
34+
GGML_API bool ggml_allocr_is_measure (ggml_allocr_t alloc);
35+
GGML_API void ggml_allocr_reset (ggml_allocr_t alloc);
36+
GGML_API void ggml_allocr_alloc (ggml_allocr_t alloc, struct ggml_tensor * tensor);
37+
GGML_API size_t ggml_allocr_max_size (ggml_allocr_t alloc);
38+
39+
GGML_API size_t ggml_allocr_alloc_graph(ggml_allocr_t alloc, struct ggml_cgraph * graph);
40+
41+
//
42+
// ggml-backend v2 API
43+
//
44+
45+
// Seperate tensor and graph allocator objects
46+
// This is necessary for multi-backend allocation because the graph allocator needs to use multiple tensor allocators
47+
// The original API is kept as a wrapper around the new API
48+
49+
// Tensor allocator
50+
typedef struct ggml_tallocr * ggml_tallocr_t;
51+
52+
GGML_API ggml_tallocr_t ggml_tallocr_new(void * data, size_t size, size_t alignment);
53+
GGML_API ggml_tallocr_t ggml_tallocr_new_measure(size_t alignment);
54+
GGML_API ggml_tallocr_t ggml_tallocr_new_from_buffer(struct ggml_backend_buffer * buffer);
55+
GGML_API ggml_tallocr_t ggml_tallocr_new_from_backend(struct ggml_backend * backend, size_t size); // allocates an owned buffer
56+
GGML_API ggml_tallocr_t ggml_tallocr_new_measure_from_backend(struct ggml_backend * backend);
57+
58+
GGML_API struct ggml_backend_buffer * ggml_tallocr_get_buffer(ggml_tallocr_t talloc);
59+
60+
GGML_API void ggml_tallocr_free (ggml_tallocr_t talloc);
61+
GGML_API bool ggml_tallocr_is_measure (ggml_tallocr_t talloc);
62+
GGML_API void ggml_tallocr_reset (ggml_tallocr_t talloc);
63+
GGML_API void ggml_tallocr_alloc (ggml_tallocr_t talloc, struct ggml_tensor * tensor);
64+
GGML_API size_t ggml_tallocr_max_size (ggml_tallocr_t talloc);
65+
66+
67+
// Graph allocator
68+
typedef struct ggml_gallocr * ggml_gallocr_t;
69+
70+
GGML_API ggml_gallocr_t ggml_gallocr_new(void);
71+
GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc);
72+
73+
GGML_API void ggml_gallocr_set_parse_seq(ggml_gallocr_t galloc, const int * list, int n);
74+
GGML_API size_t ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, ggml_tallocr_t talloc, struct ggml_cgraph * graph);
75+
76+
// Allocate tensors from the allocators given by the hash table
77+
GGML_API void ggml_gallocr_alloc_graph_n(
78+
ggml_gallocr_t galloc,
79+
struct ggml_cgraph * graph,
80+
struct ggml_hash_set hash_set,
81+
ggml_tallocr_t * hash_node_talloc);
3082

3183
#ifdef __cplusplus
3284
}

ggml-backend-impl.h

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#pragma once
2+
3+
// ggml-backend internal header
4+
5+
#include "ggml-backend.h"
6+
7+
#ifdef __cplusplus
8+
extern "C" {
9+
#endif
10+
11+
//
12+
// Backend buffer
13+
//
14+
15+
typedef void * ggml_backend_buffer_context_t;
16+
17+
struct ggml_backend_buffer_i {
18+
void (*free_buffer) (ggml_backend_buffer_t buffer);
19+
void * (*get_base) (ggml_backend_buffer_t buffer); // get base pointer
20+
size_t (*get_alloc_size)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-allocation callback
21+
void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // post-allocation callback
22+
void (*free_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-free callback
23+
};
24+
25+
struct ggml_backend_buffer {
26+
struct ggml_backend_buffer_i iface;
27+
28+
ggml_backend_t backend;
29+
ggml_backend_buffer_context_t context;
30+
31+
size_t size;
32+
};
33+
34+
GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
35+
struct ggml_backend * backend,
36+
struct ggml_backend_buffer_i iface,
37+
ggml_backend_buffer_context_t context,
38+
size_t size);
39+
40+
//
41+
// Backend
42+
//
43+
44+
typedef void * ggml_backend_context_t;
45+
46+
struct ggml_backend_i {
47+
const char * (*get_name)(ggml_backend_t backend);
48+
49+
void (*free)(ggml_backend_t backend);
50+
51+
// buffer allocation
52+
ggml_backend_buffer_t (*alloc_buffer)(ggml_backend_t backend, size_t size);
53+
54+
// get buffer alignment
55+
size_t (*get_alignment)(ggml_backend_t backend);
56+
57+
// tensor data access
58+
// these functions can be asynchronous, helper functions are provided for synchronous access that automatically call synchronize
59+
void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
60+
void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
61+
void (*synchronize) (ggml_backend_t backend);
62+
63+
// (optional) copy tensor between different backends, allow for single-copy tranfers
64+
void (*cpy_tensor_from)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
65+
void (*cpy_tensor_to) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
66+
67+
// compute graph with a plan
68+
ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
69+
void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
70+
void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
71+
72+
// compute graph without a plan
73+
void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
74+
75+
// check if the backend supports an operation
76+
bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
77+
};
78+
79+
struct ggml_backend {
80+
struct ggml_backend_i iface;
81+
82+
ggml_backend_context_t context;
83+
};
84+
85+
#ifdef __cplusplus
86+
}
87+
#endif

0 commit comments

Comments
 (0)