Skip to content

Commit 8e564be

Browse files
committed
kompute: add backend registry / device interfaces
Get in line with the other backends by supporting the newer backend/device registry interfaces. Signed-off-by: Sergio Lopez <[email protected]>
1 parent 2f8bd2b commit 8e564be

File tree

4 files changed

+210
-98
lines changed

4 files changed

+210
-98
lines changed

ggml/include/ggml-kompute.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
extern "C" {
1212
#endif
1313

14+
#define GGML_KOMPUTE_MAX_DEVICES 16
15+
1416
struct ggml_vk_device {
1517
int index;
1618
int type; // same as VkPhysicalDeviceType
@@ -41,6 +43,8 @@ GGML_API bool ggml_backend_is_kompute(ggml_backend_t backend);
4143

4244
GGML_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
4345

46+
GGML_API ggml_backend_reg_t ggml_backend_kompute_reg(void);
47+
4448
#ifdef __cplusplus
4549
}
4650
#endif

ggml/src/ggml-backend.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
565565
#include "ggml-cann.h"
566566
#endif
567567

568+
#ifdef GGML_USE_KOMPUTE
569+
#include "ggml-kompute.h"
570+
#endif
571+
568572
struct ggml_backend_registry {
569573
std::vector<ggml_backend_reg_t> backends;
570574
std::vector<ggml_backend_dev_t> devices;
@@ -594,8 +598,9 @@ struct ggml_backend_registry {
594598
#ifdef GGML_USE_CANN
595599
register_backend(ggml_backend_cann_reg());
596600
#endif
597-
598-
// TODO: kompute
601+
#ifdef GGML_USE_KOMPUTE
602+
register_backend(ggml_backend_kompute_reg());
603+
#endif
599604

600605
register_backend(ggml_backend_cpu_reg());
601606
}

ggml/src/ggml-kompute.cpp

Lines changed: 199 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include <cstring>
4343
#include <iostream>
4444
#include <memory>
45+
#include <mutex>
4546
#include <stdexcept>
4647
#include <string>
4748
#include <unordered_map>
@@ -273,18 +274,9 @@ static std::vector<ggml_vk_device> ggml_vk_available_devices_internal(size_t mem
273274
return results;
274275
}
275276

276-
// public API returns a C-style array
277-
ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count) {
278-
auto devices = ggml_vk_available_devices_internal(memoryRequired);
279-
*count = devices.size();
280-
if (devices.empty()) {
281-
return nullptr;
282-
}
283-
284-
size_t nbytes = sizeof (ggml_vk_device) * (devices.size());
285-
auto * arr = static_cast<ggml_vk_device *>(malloc(nbytes));
286-
memcpy(arr, devices.data(), nbytes);
287-
return arr;
277+
static std::vector<ggml_vk_device>& ggml_vk_available_devices() {
278+
static std::vector<ggml_vk_device> devices = ggml_vk_available_devices_internal(0);
279+
return devices;
288280
}
289281

290282
static void ggml_vk_filterByVendor(std::vector<ggml_vk_device>& devices, const std::string& targetVendor) {
@@ -341,7 +333,7 @@ ggml_vk_device ggml_vk_current_device() {
341333
if (!komputeManager()->hasDevice())
342334
return ggml_vk_device();
343335

344-
auto devices = ggml_vk_available_devices_internal(0);
336+
auto devices = ggml_vk_available_devices();
345337
ggml_vk_filterByName(devices, komputeManager()->physicalDevice()->getProperties().deviceName.data());
346338
GGML_ASSERT(!devices.empty());
347339
return devices.front();
@@ -1323,17 +1315,7 @@ static void ggml_vk_cpy_f16_f32(Args&&... args) {
13231315
ggml_vk_cpy(spirv, 2, 4, std::forward<Args>(args)...);
13241316
}
13251317

1326-
static bool ggml_vk_supports_op(const struct ggml_tensor * op) {
1327-
switch (op->type) {
1328-
case GGML_TYPE_F16:
1329-
case GGML_TYPE_F32:
1330-
case GGML_TYPE_Q4_0:
1331-
case GGML_TYPE_Q4_1:
1332-
break;
1333-
default:
1334-
return false;
1335-
}
1336-
1318+
static bool ggml_backend_kompute_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
13371319
switch (op->op) {
13381320
case GGML_OP_UNARY:
13391321
switch (ggml_get_unary_op(op)) {
@@ -1410,6 +1392,8 @@ static bool ggml_vk_supports_op(const struct ggml_tensor * op) {
14101392
;
14111393
}
14121394
return false;
1395+
1396+
GGML_UNUSED(dev);
14131397
}
14141398

14151399
static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf) {
@@ -1458,10 +1442,12 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
14581442

14591443
any_commands_recorded = true;
14601444

1445+
/* Do we still need this?
14611446
if (!ggml_vk_supports_op(dst)) {
14621447
fprintf(stderr, "%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst));
14631448
GGML_ABORT("unsupported op");
14641449
}
1450+
*/
14651451

14661452
const int32_t ne00 = src0 ? src0->ne[0] : 0;
14671453
const int32_t ne01 = src0 ? src0->ne[1] : 0;
@@ -1913,25 +1899,30 @@ static ggml_backend_buffer_type_i ggml_backend_kompute_buffer_type_interface = {
19131899
};
19141900

19151901
ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device) {
1916-
static std::vector<ggml_backend_buffer_type> bufts = []() {
1917-
std::vector<ggml_backend_buffer_type> vec;
1918-
auto devices = ggml_vk_available_devices_internal(0);
1919-
vec.reserve(devices.size());
1920-
1921-
for (const auto & dev : devices) {
1922-
vec.push_back({
1923-
/* .iface = */ ggml_backend_kompute_buffer_type_interface,
1924-
/* .device = */ nullptr,
1925-
/* .context = */ new ggml_backend_kompute_buffer_type_context(dev.index, dev.bufferAlignment, dev.maxAlloc)
1926-
});
1902+
static std::mutex mutex;
1903+
std::lock_guard<std::mutex> lock(mutex);
1904+
1905+
auto devices = ggml_vk_available_devices();
1906+
GGML_ASSERT((size_t) device < devices.size());
1907+
GGML_ASSERT(devices.size() <= GGML_KOMPUTE_MAX_DEVICES);
1908+
1909+
static ggml_backend_buffer_type
1910+
ggml_backend_kompute_buffer_types[GGML_KOMPUTE_MAX_DEVICES];
1911+
1912+
static bool ggml_backend_kompute_buffer_type_initialized = false;
1913+
1914+
if (!ggml_backend_kompute_buffer_type_initialized) {
1915+
for (int32_t i = 0; i < devices.size(); i++) {
1916+
ggml_backend_kompute_buffer_types[i] = {
1917+
/* .iface = */ ggml_backend_kompute_buffer_type_interface,
1918+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_kompute_reg(), i),
1919+
/* .context = */ new ggml_backend_kompute_buffer_type_context{ i, devices[i].bufferAlignment, devices[i].maxAlloc },
1920+
};
19271921
}
1928-
return vec;
1929-
}();
1922+
ggml_backend_kompute_buffer_type_initialized = true;
1923+
}
19301924

1931-
auto it = std::find_if(bufts.begin(), bufts.end(), [device](const ggml_backend_buffer_type & t) {
1932-
return device == static_cast<ggml_backend_kompute_buffer_type_context *>(t.context)->device;
1933-
});
1934-
return it < bufts.end() ? &*it : nullptr;
1925+
return &ggml_backend_kompute_buffer_types[device];
19351926
}
19361927

19371928
// backend
@@ -1964,16 +1955,6 @@ static ggml_status ggml_backend_kompute_graph_compute(ggml_backend_t backend, st
19641955
return GGML_STATUS_SUCCESS;
19651956
}
19661957

1967-
static bool ggml_backend_kompute_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
1968-
GGML_UNUSED(backend);
1969-
return ggml_vk_supports_op(op);
1970-
}
1971-
1972-
static bool ggml_backend_kompute_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
1973-
GGML_UNUSED(backend);
1974-
return buft->iface.get_name == ggml_backend_kompute_buffer_type_get_name;
1975-
}
1976-
19771958
static struct ggml_backend_i kompute_backend_i = {
19781959
/* .get_name = */ ggml_backend_kompute_name,
19791960
/* .free = */ ggml_backend_kompute_free,
@@ -1987,8 +1968,8 @@ static struct ggml_backend_i kompute_backend_i = {
19871968
/* .graph_plan_update = */ NULL,
19881969
/* .graph_plan_compute = */ NULL,
19891970
/* .graph_compute = */ ggml_backend_kompute_graph_compute,
1990-
/* .supports_op = */ ggml_backend_kompute_supports_op,
1991-
/* .supports_buft = */ ggml_backend_kompute_supports_buft,
1971+
/* .supports_op = */ NULL,
1972+
/* .supports_buft = */ NULL,
19921973
/* .offload_op = */ NULL,
19931974
/* .event_record = */ NULL,
19941975
/* .event_wait = */ NULL,
@@ -2006,7 +1987,7 @@ ggml_backend_t ggml_backend_kompute_init(int device) {
20061987
ggml_backend_t kompute_backend = new ggml_backend {
20071988
/* .guid = */ ggml_backend_kompute_guid(),
20081989
/* .interface = */ kompute_backend_i,
2009-
/* .device = */ nullptr,
1990+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_kompute_reg(), device),
20101991
/* .context = */ s_kompute_context,
20111992
};
20121993

@@ -2016,3 +1997,167 @@ ggml_backend_t ggml_backend_kompute_init(int device) {
20161997
bool ggml_backend_is_kompute(ggml_backend_t backend) {
20171998
return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_kompute_guid());
20181999
}
2000+
2001+
static size_t ggml_backend_kompute_get_device_count() {
2002+
auto devices = ggml_vk_available_devices();
2003+
return devices.size();
2004+
}
2005+
2006+
static void ggml_backend_kompute_get_device_description(int device, char * description, size_t description_size) {
2007+
auto devices = ggml_vk_available_devices();
2008+
GGML_ASSERT((size_t) device < devices.size());
2009+
snprintf(description, description_size, "%s", devices[device].name);
2010+
}
2011+
2012+
static void ggml_backend_kompute_get_device_memory(int device, size_t * free, size_t * total) {
2013+
auto devices = ggml_vk_available_devices();
2014+
GGML_ASSERT((size_t) device < devices.size());
2015+
*total = devices[device].heapSize;
2016+
*free = devices[device].heapSize;
2017+
}
2018+
2019+
//////////////////////////
2020+
2021+
struct ggml_backend_kompute_device_context {
2022+
int device;
2023+
std::string name;
2024+
std::string description;
2025+
};
2026+
2027+
static const char * ggml_backend_kompute_device_get_name(ggml_backend_dev_t dev) {
2028+
ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
2029+
return ctx->name.c_str();
2030+
}
2031+
2032+
static const char * ggml_backend_kompute_device_get_description(ggml_backend_dev_t dev) {
2033+
ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
2034+
return ctx->description.c_str();
2035+
}
2036+
2037+
static void ggml_backend_kompute_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
2038+
ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
2039+
ggml_backend_kompute_get_device_memory(ctx->device, free, total);
2040+
}
2041+
2042+
static ggml_backend_buffer_type_t ggml_backend_kompute_device_get_buffer_type(ggml_backend_dev_t dev) {
2043+
ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
2044+
return ggml_backend_kompute_buffer_type(ctx->device);
2045+
}
2046+
2047+
static bool ggml_backend_kompute_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
2048+
if (buft->iface.get_name != ggml_backend_kompute_buffer_type_get_name) {
2049+
return false;
2050+
}
2051+
2052+
ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
2053+
ggml_backend_kompute_buffer_type_context * buft_ctx = (ggml_backend_kompute_buffer_type_context *)buft->context;
2054+
2055+
return buft_ctx->device == ctx->device;
2056+
}
2057+
2058+
static enum ggml_backend_dev_type ggml_backend_kompute_device_get_type(ggml_backend_dev_t dev) {
2059+
GGML_UNUSED(dev);
2060+
return GGML_BACKEND_DEVICE_TYPE_GPU_FULL;
2061+
}
2062+
2063+
static void ggml_backend_kompute_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
2064+
props->name = ggml_backend_kompute_device_get_name(dev);
2065+
props->description = ggml_backend_kompute_device_get_description(dev);
2066+
props->type = ggml_backend_kompute_device_get_type(dev);
2067+
ggml_backend_kompute_device_get_memory(dev, &props->memory_free, &props->memory_total);
2068+
props->caps = {
2069+
/* async = */ false,
2070+
/* host_buffer = */ false,
2071+
/* .buffer_from_host_ptr = */ false,
2072+
/* events = */ false,
2073+
};
2074+
}
2075+
2076+
static ggml_backend_t ggml_backend_kompute_device_init(ggml_backend_dev_t dev, const char * params) {
2077+
GGML_UNUSED(params);
2078+
ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
2079+
return ggml_backend_kompute_init(ctx->device);
2080+
}
2081+
2082+
static bool ggml_backend_kompute_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
2083+
const int min_batch_size = 32;
2084+
2085+
return (op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS) ||
2086+
(op->ne[2] >= min_batch_size && op->op == GGML_OP_MUL_MAT_ID);
2087+
2088+
GGML_UNUSED(dev);
2089+
}
2090+
2091+
static const struct ggml_backend_device_i ggml_backend_kompute_device_i = {
2092+
/* .get_name = */ ggml_backend_kompute_device_get_name,
2093+
/* .get_description = */ ggml_backend_kompute_device_get_description,
2094+
/* .get_memory = */ ggml_backend_kompute_device_get_memory,
2095+
/* .get_type = */ ggml_backend_kompute_device_get_type,
2096+
/* .get_props = */ ggml_backend_kompute_device_get_props,
2097+
/* .init_backend = */ ggml_backend_kompute_device_init,
2098+
/* .get_buffer_type = */ ggml_backend_kompute_device_get_buffer_type,
2099+
/* .get_host_buffer_type = */ NULL,
2100+
/* .buffer_from_host_ptr = */ NULL,
2101+
/* .supports_op = */ ggml_backend_kompute_device_supports_op,
2102+
/* .supports_buft = */ ggml_backend_kompute_device_supports_buft,
2103+
/* .offload_op = */ ggml_backend_kompute_device_offload_op,
2104+
/* .event_new = */ NULL,
2105+
/* .event_free = */ NULL,
2106+
/* .event_synchronize = */ NULL,
2107+
};
2108+
2109+
static const char * ggml_backend_kompute_reg_get_name(ggml_backend_reg_t reg) {
2110+
GGML_UNUSED(reg);
2111+
return "Kompute";
2112+
}
2113+
2114+
static size_t ggml_backend_kompute_reg_get_device_count(ggml_backend_reg_t reg) {
2115+
GGML_UNUSED(reg);
2116+
return ggml_backend_kompute_get_device_count();
2117+
}
2118+
2119+
static ggml_backend_dev_t ggml_backend_kompute_reg_get_device(ggml_backend_reg_t reg, size_t device) {
2120+
static std::vector<ggml_backend_dev_t> devices;
2121+
2122+
static bool initialized = false;
2123+
2124+
{
2125+
static std::mutex mutex;
2126+
std::lock_guard<std::mutex> lock(mutex);
2127+
if (!initialized) {
2128+
for (size_t i = 0; i < ggml_backend_kompute_get_device_count(); i++) {
2129+
ggml_backend_kompute_device_context * ctx = new ggml_backend_kompute_device_context;
2130+
char desc[256];
2131+
ggml_backend_kompute_get_device_description(i, desc, sizeof(desc));
2132+
ctx->device = i;
2133+
ctx->name = "Kompute" + std::to_string(i);
2134+
ctx->description = desc;
2135+
devices.push_back(new ggml_backend_device {
2136+
/* .iface = */ ggml_backend_kompute_device_i,
2137+
/* .reg = */ reg,
2138+
/* .context = */ ctx,
2139+
});
2140+
}
2141+
initialized = true;
2142+
}
2143+
}
2144+
2145+
GGML_ASSERT(device < devices.size());
2146+
return devices[device];
2147+
}
2148+
2149+
static const struct ggml_backend_reg_i ggml_backend_kompute_reg_i = {
2150+
/* .get_name = */ ggml_backend_kompute_reg_get_name,
2151+
/* .get_device_count = */ ggml_backend_kompute_reg_get_device_count,
2152+
/* .get_device = */ ggml_backend_kompute_reg_get_device,
2153+
/* .get_proc_address = */ NULL,
2154+
};
2155+
2156+
ggml_backend_reg_t ggml_backend_kompute_reg() {
2157+
static ggml_backend_reg reg = {
2158+
/* .iface = */ ggml_backend_kompute_reg_i,
2159+
/* .context = */ nullptr,
2160+
};
2161+
2162+
return &reg;
2163+
}

0 commit comments

Comments
 (0)