Skip to content

Commit 6ff0e7a

Browse files
committed
add device props/caps, fully support async upload for all compatible backends
1 parent 805fea9 commit 6ff0e7a

File tree

6 files changed

+219
-91
lines changed

6 files changed

+219
-91
lines changed

ggml/include/ggml-backend.h

Lines changed: 50 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@ extern "C" {
2020
// Backend buffer type
2121
//
2222

23-
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
24-
GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
25-
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
26-
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
27-
GGML_API size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
28-
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
29-
GGML_API ggml_backend_dev_t ggml_backend_buft_get_device (ggml_backend_buffer_type_t buft);
23+
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
24+
GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
25+
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
26+
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
27+
GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
28+
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
29+
GGML_API ggml_backend_dev_t ggml_backend_buft_get_device (ggml_backend_buffer_type_t buft);
3030

3131
//
3232
// Backend buffer
@@ -53,6 +53,9 @@ extern "C" {
5353
GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
5454
GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
5555

56+
// tensor copy between different backends
57+
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
58+
5659
//
5760
// Backend (stream)
5861
//
@@ -88,49 +91,70 @@ extern "C" {
8891
GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
8992
GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
9093

91-
// tensor copy between different backends
92-
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
93-
9494
// asynchronous copy
9595
// the copy is performed after all the currently queued operations in backend_src
9696
// backend_dst will wait for the copy to complete before performing other operations
9797
// automatic fallback to sync copy if async is not supported
9898
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
9999

100-
// events
101-
GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_dev_t device);
102-
GGML_API void ggml_backend_event_free (ggml_backend_event_t event);
103-
GGML_API void ggml_backend_event_record (ggml_backend_event_t event, ggml_backend_t backend);
100+
GGML_API ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend);
101+
102+
//
103+
// Events
104+
//
105+
106+
GGML_API ggml_backend_event_t ggml_backend_event_new(ggml_backend_dev_t device);
107+
GGML_API void ggml_backend_event_free(ggml_backend_event_t event);
108+
GGML_API void ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend);
104109
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
105-
GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event);
110+
GGML_API void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event);
106111

107112
//
108113
// Backend device
109114
//
110115

111-
enum ggml_backend_device_type {
116+
enum ggml_backend_dev_type {
112117
GGML_BACKEND_DEVICE_TYPE_CPU,
113118
GGML_BACKEND_DEVICE_TYPE_GPU,
114-
// devices with full capabilities (excludes backends such as BLAS)
119+
// devices with full capabilities (excludes backends such as BLAS that only support matrix multiplication)
115120
GGML_BACKEND_DEVICE_TYPE_CPU_FULL,
116121
GGML_BACKEND_DEVICE_TYPE_GPU_FULL
117122
};
118123

124+
// functionality supported by the device
125+
struct ggml_backend_dev_caps {
126+
// asynchronous operations
127+
bool async;
128+
// pinned host buffer
129+
bool host_buffer;
130+
// event synchronization
131+
bool events;
132+
};
133+
134+
// all the device properties
135+
struct ggml_backend_dev_props {
136+
const char * name;
137+
const char * description;
138+
size_t memory_free;
139+
size_t memory_total;
140+
enum ggml_backend_dev_type type;
141+
struct ggml_backend_dev_caps caps;
142+
};
143+
119144
GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device);
120145
GGML_API const char * ggml_backend_dev_description(ggml_backend_dev_t device);
121146
GGML_API void ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total);
122-
GGML_API enum ggml_backend_device_type ggml_backend_dev_type(ggml_backend_dev_t device);
147+
GGML_API enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device);
148+
GGML_API void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props);
123149
GGML_API ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device);
124150
GGML_API ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * params);
125151
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t device);
126152
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device);
127153
GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size);
128154

129-
GGML_API bool ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
130-
GGML_API bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft);
131-
GGML_API bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
132-
133-
GGML_API ggml_backend_event_t ggml_backend_dev_event_new(ggml_backend_dev_t device);
155+
GGML_API bool ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
156+
GGML_API bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft);
157+
GGML_API bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
134158

135159
//
136160
// Backend (reg)
@@ -158,16 +182,16 @@ extern "C" {
158182
GGML_API size_t ggml_backend_dev_count(void);
159183
GGML_API ggml_backend_dev_t ggml_backend_dev_get(size_t index);
160184
GGML_API ggml_backend_dev_t ggml_backend_dev_by_name(const char * name);
161-
GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_device_type type);
185+
GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type);
162186

163187
// Set the log callback for all registered backends
164188
GGML_API void ggml_backend_set_log_callback(ggml_log_callback log_callback, void * user_data);
165189

166-
// Direct Backend (stream) initialization
190+
// Direct backend (stream) initialization
167191
// = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params)
168192
GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params);
169193
// = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params)
170-
GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_device_type type, const char * params);
194+
GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params);
171195
// = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU_FULL) OR ggml_backend_dev_by_type(CPU_FULL), NULL)
172196
GGML_API ggml_backend_t ggml_backend_init_best(void);
173197

@@ -276,7 +300,6 @@ extern "C" {
276300
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
277301
GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
278302

279-
280303
//
281304
// CPU backend
282305
//

ggml/src/ggml-backend-impl.h

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,9 @@ extern "C" {
99
#endif
1010

1111
//
12-
// Backend buffer
12+
// Backend buffer type
1313
//
1414

15-
// buffer type
1615
struct ggml_backend_buffer_type_i {
1716
const char * (*get_name) (ggml_backend_buffer_type_t buft);
1817
// allocate a buffer of this type
@@ -33,7 +32,10 @@ extern "C" {
3332
void * context;
3433
};
3534

36-
// buffer
35+
//
36+
// Backend buffer
37+
//
38+
3739
struct ggml_backend_buffer_i {
3840
const char * (*get_name) (ggml_backend_buffer_t buffer);
3941
// (optional) free the buffer
@@ -143,15 +145,26 @@ extern "C" {
143145
};
144146

145147
//
146-
// Backend registry v2
148+
// Backend device
147149
//
148150

151+
// Note: if additional properties are needed, we should add a struct with all of them
152+
// the current functions to obtain the properties can remain, since they are more convenient for often used properties
149153
struct ggml_backend_device_i {
150-
// device properties
154+
// device name: short identifier for this device, such as "CPU" or "CUDA0"
151155
const char * (*get_name)(ggml_backend_dev_t dev);
156+
157+
// device description: short informative description of the device, could be the model name
152158
const char * (*get_description)(ggml_backend_dev_t dev);
159+
160+
// device memory in bytes
153161
void (*get_memory)(ggml_backend_dev_t dev, size_t * free, size_t * total);
154-
enum ggml_backend_device_type (*get_type)(ggml_backend_dev_t dev);
162+
163+
// device type
164+
enum ggml_backend_dev_type (*get_type)(ggml_backend_dev_t dev);
165+
166+
// device properties
167+
void (*get_props)(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props);
155168

156169
// get the backend (reg) associated with this device
157170
ggml_backend_reg_t (*get_backend_reg)(ggml_backend_dev_t dev);
@@ -190,6 +203,10 @@ extern "C" {
190203
void * context;
191204
};
192205

206+
//
207+
// Backend (reg)
208+
//
209+
193210
struct ggml_backend_reg_i {
194211
const char * (*get_name)(ggml_backend_reg_t reg);
195212

@@ -212,7 +229,7 @@ extern "C" {
212229
};
213230

214231

215-
// Internal API
232+
// Internal backend registry API
216233
void ggml_backend_register(ggml_backend_reg_t reg);
217234
void ggml_backend_device_register(ggml_backend_dev_t device);
218235
// TODO: backends can be loaded as a dynamic library, in which case it needs to export this function

ggml/src/ggml-backend.cpp

Lines changed: 44 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,10 @@ bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor *
331331
return false;
332332
}
333333

334+
ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend) {
335+
return backend->device;
336+
}
337+
334338
// backend copy
335339

336340
static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
@@ -440,10 +444,14 @@ void ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t *
440444
device->iface.get_memory(device, free, total);
441445
}
442446

443-
enum ggml_backend_device_type ggml_backend_dev_type(ggml_backend_dev_t device) {
447+
enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device) {
444448
return device->iface.get_type(device);
445449
}
446450

451+
void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props) {
452+
device->iface.get_props(device, props);
453+
}
454+
447455
ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device) {
448456
return device->iface.get_backend_reg(device);
449457
}
@@ -476,13 +484,6 @@ bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_te
476484
return device->iface.offload_op(device, op);
477485
}
478486

479-
ggml_backend_event_t ggml_backend_dev_event_new(ggml_backend_dev_t device) {
480-
if (!device->iface.event_new) {
481-
return NULL;
482-
}
483-
return device->iface.event_new(device);
484-
}
485-
486487
// Backend (reg)
487488

488489
const char * ggml_backend_reg_name(ggml_backend_reg_t reg) {
@@ -603,7 +604,7 @@ ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
603604
return NULL;
604605
}
605606

606-
ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_device_type type) {
607+
ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
607608
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
608609
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
609610
if (ggml_backend_dev_type(dev) == type) {
@@ -629,7 +630,7 @@ ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params)
629630
return ggml_backend_dev_init(dev, params);
630631
}
631632

632-
ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_device_type type, const char * params) {
633+
ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
633634
ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
634635
if (!dev) {
635636
return NULL;
@@ -1028,60 +1029,72 @@ static ggml_backend_t ggml_backend_reg_cpu_init(const char * params, void * user
10281029

10291030
////////////////////////
10301031

1031-
static const char * ggml_backend_cpu_device_name(ggml_backend_dev_t device) {
1032+
static const char * ggml_backend_cpu_device_name(ggml_backend_dev_t dev) {
10321033
return "CPU";
10331034

1034-
GGML_UNUSED(device);
1035+
GGML_UNUSED(dev);
10351036
}
10361037

1037-
static const char * ggml_backend_cpu_device_description(ggml_backend_dev_t device) {
1038+
static const char * ggml_backend_cpu_device_description(ggml_backend_dev_t dev) {
10381039
// TODO
10391040
return "CPU";
10401041

1041-
GGML_UNUSED(device);
1042+
GGML_UNUSED(dev);
10421043
}
10431044

1044-
static void ggml_backend_cpu_device_memory(ggml_backend_dev_t device, size_t * free, size_t * total) {
1045+
static void ggml_backend_cpu_device_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
10451046
// TODO
10461047
*free = 0;
10471048
*total = 0;
10481049

1049-
GGML_UNUSED(device);
1050+
GGML_UNUSED(dev);
10501051
}
10511052

1052-
static enum ggml_backend_device_type ggml_backend_cpu_device_type(ggml_backend_dev_t device) {
1053+
static enum ggml_backend_dev_type ggml_backend_cpu_device_type(ggml_backend_dev_t dev) {
10531054
return GGML_BACKEND_DEVICE_TYPE_CPU_FULL;
10541055

1055-
GGML_UNUSED(device);
1056+
GGML_UNUSED(dev);
1057+
}
1058+
1059+
static void ggml_backend_cpu_device_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
1060+
props->name = ggml_backend_cpu_device_name(dev);
1061+
props->description = ggml_backend_cpu_device_description(dev);
1062+
props->type = ggml_backend_cpu_device_type(dev);
1063+
ggml_backend_cpu_device_memory(dev, &props->memory_free, &props->memory_total);
1064+
props->caps = {
1065+
/* async */ false,
1066+
/* host_buffer */ false,
1067+
/* events */ false,
1068+
};
10561069
}
10571070

1058-
static ggml_backend_reg_t ggml_backend_cpu_device_reg(ggml_backend_dev_t device) {
1071+
static ggml_backend_reg_t ggml_backend_cpu_device_reg(ggml_backend_dev_t dev) {
10591072
return ggml_backend_cpu_reg();
10601073

1061-
GGML_UNUSED(device);
1074+
GGML_UNUSED(dev);
10621075
}
10631076

1064-
static ggml_backend_t ggml_backend_cpu_device_init(ggml_backend_dev_t device, const char * params) {
1077+
static ggml_backend_t ggml_backend_cpu_device_init(ggml_backend_dev_t dev, const char * params) {
10651078
return ggml_backend_cpu_init();
10661079

1067-
GGML_UNUSED(device);
1080+
GGML_UNUSED(dev);
10681081
GGML_UNUSED(params);
10691082
}
10701083

1071-
static ggml_backend_buffer_type_t ggml_backend_cpu_device_buffer_type(ggml_backend_dev_t device) {
1084+
static ggml_backend_buffer_type_t ggml_backend_cpu_device_buffer_type(ggml_backend_dev_t dev) {
10721085
return ggml_backend_cpu_buffer_type();
10731086

1074-
GGML_UNUSED(device);
1087+
GGML_UNUSED(dev);
10751088
}
10761089

1077-
static ggml_backend_buffer_t ggml_backend_cpu_device_buffer_from_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size) {
1090+
static ggml_backend_buffer_t ggml_backend_cpu_device_buffer_from_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
10781091
return ggml_backend_cpu_buffer_from_ptr(ptr, size);
10791092

1080-
GGML_UNUSED(device);
1093+
GGML_UNUSED(dev);
10811094
GGML_UNUSED(max_tensor_size);
10821095
}
10831096

1084-
static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op) {
1097+
static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
10851098
switch (op->op) {
10861099
case GGML_OP_CPY:
10871100
return
@@ -1101,20 +1114,21 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t device, const
11011114
return true;
11021115
}
11031116

1104-
GGML_UNUSED(device);
1117+
GGML_UNUSED(dev);
11051118
}
11061119

1107-
static bool ggml_backend_cpu_device_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft) {
1120+
static bool ggml_backend_cpu_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
11081121
return ggml_backend_buft_is_host(buft);
11091122

1110-
GGML_UNUSED(device);
1123+
GGML_UNUSED(dev);
11111124
}
11121125

11131126
struct ggml_backend_device_i ggml_backend_cpu_device_i = {
11141127
/* .get_name = */ ggml_backend_cpu_device_name,
11151128
/* .get_description = */ ggml_backend_cpu_device_description,
11161129
/* .get_memory = */ ggml_backend_cpu_device_memory,
11171130
/* .get_type = */ ggml_backend_cpu_device_type,
1131+
/* .get_props = */ ggml_backend_cpu_device_props,
11181132
/* .get_backend_reg = */ ggml_backend_cpu_device_reg,
11191133
/* .init_backend = */ ggml_backend_cpu_device_init,
11201134
/* .buffer_type = */ ggml_backend_cpu_device_buffer_type,

0 commit comments

Comments
 (0)