Skip to content

Commit bb36c4b

Browse files
committed
kompute: add backend registry / device interfaces
Get in line with the other backends by supporting the newer backend/device registry interfaces. Signed-off-by: Sergio Lopez <[email protected]>
1 parent b9e02e8 commit bb36c4b

File tree

3 files changed

+207
-59
lines changed

3 files changed

+207
-59
lines changed

ggml/include/ggml-kompute.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
extern "C" {
1212
#endif
1313

14+
#define GGML_KOMPUTE_MAX_DEVICES 16
15+
1416
struct ggml_vk_device {
1517
int index;
1618
int type; // same as VkPhysicalDeviceType
@@ -41,6 +43,8 @@ GGML_API bool ggml_backend_is_kompute(ggml_backend_t backend);
4143

4244
GGML_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
4345

46+
GGML_API ggml_backend_reg_t ggml_backend_kompute_reg(void);
47+
4448
#ifdef __cplusplus
4549
}
4650
#endif

ggml/src/ggml-backend.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
562562
#include "ggml-cann.h"
563563
#endif
564564

565+
#ifdef GGML_USE_KOMPUTE
566+
#include "ggml-kompute.h"
567+
#endif
568+
565569
struct ggml_backend_registry {
566570
std::vector<ggml_backend_reg_t> backends;
567571
std::vector<ggml_backend_dev_t> devices;
@@ -591,8 +595,9 @@ struct ggml_backend_registry {
591595
#ifdef GGML_USE_AMX
592596
register_backend(ggml_backend_amx_reg());
593597
#endif
594-
595-
// TODO: kompute
598+
#ifdef GGML_USE_KOMPUTE
599+
register_backend(ggml_backend_kompute_reg());
600+
#endif
596601

597602
register_backend(ggml_backend_cpu_reg());
598603
}

ggml/src/ggml-kompute.cpp

Lines changed: 196 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include <cstring>
4343
#include <iostream>
4444
#include <memory>
45+
#include <mutex>
4546
#include <stdexcept>
4647
#include <string>
4748
#include <unordered_map>
@@ -273,18 +274,9 @@ static std::vector<ggml_vk_device> ggml_vk_available_devices_internal(size_t mem
273274
return results;
274275
}
275276

276-
// public API returns a C-style array
277-
ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count) {
278-
auto devices = ggml_vk_available_devices_internal(memoryRequired);
279-
*count = devices.size();
280-
if (devices.empty()) {
281-
return nullptr;
282-
}
283-
284-
size_t nbytes = sizeof (ggml_vk_device) * (devices.size());
285-
auto * arr = static_cast<ggml_vk_device *>(malloc(nbytes));
286-
memcpy(arr, devices.data(), nbytes);
287-
return arr;
277+
static std::vector<ggml_vk_device>& ggml_vk_available_devices() {
278+
static std::vector<ggml_vk_device> devices = ggml_vk_available_devices_internal(0);
279+
return devices;
288280
}
289281

290282
static void ggml_vk_filterByVendor(std::vector<ggml_vk_device>& devices, const std::string& targetVendor) {
@@ -341,7 +333,7 @@ ggml_vk_device ggml_vk_current_device() {
341333
if (!komputeManager()->hasDevice())
342334
return ggml_vk_device();
343335

344-
auto devices = ggml_vk_available_devices_internal(0);
336+
auto devices = ggml_vk_available_devices();
345337
ggml_vk_filterByName(devices, komputeManager()->physicalDevice()->getProperties().deviceName.data());
346338
GGML_ASSERT(!devices.empty());
347339
return devices.front();
@@ -1323,17 +1315,7 @@ static void ggml_vk_cpy_f16_f32(Args&&... args) {
13231315
ggml_vk_cpy(spirv, 2, 4, std::forward<Args>(args)...);
13241316
}
13251317

1326-
static bool ggml_vk_supports_op(const struct ggml_tensor * op) {
1327-
switch (op->type) {
1328-
case GGML_TYPE_F16:
1329-
case GGML_TYPE_F32:
1330-
case GGML_TYPE_Q4_0:
1331-
case GGML_TYPE_Q4_1:
1332-
break;
1333-
default:
1334-
return false;
1335-
}
1336-
1318+
static bool ggml_backend_kompute_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
13371319
switch (op->op) {
13381320
case GGML_OP_UNARY:
13391321
switch (ggml_get_unary_op(op)) {
@@ -1410,6 +1392,8 @@ static bool ggml_vk_supports_op(const struct ggml_tensor * op) {
14101392
;
14111393
}
14121394
return false;
1395+
1396+
GGML_UNUSED(dev);
14131397
}
14141398

14151399
static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf) {
@@ -1458,11 +1442,6 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
14581442

14591443
any_commands_recorded = true;
14601444

1461-
if (!ggml_vk_supports_op(dst)) {
1462-
fprintf(stderr, "%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst));
1463-
GGML_ABORT("unsupported op");
1464-
}
1465-
14661445
const int32_t ne00 = src0 ? src0->ne[0] : 0;
14671446
const int32_t ne01 = src0 ? src0->ne[1] : 0;
14681447
const int32_t ne02 = src0 ? src0->ne[2] : 0;
@@ -1907,25 +1886,31 @@ static ggml_backend_buffer_type_i ggml_backend_kompute_buffer_type_interface = {
19071886
};
19081887

19091888
ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device) {
1910-
static std::vector<ggml_backend_buffer_type> bufts = []() {
1911-
std::vector<ggml_backend_buffer_type> vec;
1912-
auto devices = ggml_vk_available_devices_internal(0);
1913-
vec.reserve(devices.size());
1914-
1915-
for (const auto & dev : devices) {
1916-
vec.push_back({
1917-
/* .iface = */ ggml_backend_kompute_buffer_type_interface,
1918-
/* .device = */ nullptr,
1919-
/* .context = */ new ggml_backend_kompute_buffer_type_context(dev.index, dev.bufferAlignment, dev.maxAlloc)
1920-
});
1889+
static std::mutex mutex;
1890+
std::lock_guard<std::mutex> lock(mutex);
1891+
1892+
auto devices = ggml_vk_available_devices();
1893+
int32_t device_count = (int32_t) devices.size();
1894+
GGML_ASSERT(device < device_count);
1895+
GGML_ASSERT(devices.size() <= GGML_KOMPUTE_MAX_DEVICES);
1896+
1897+
static ggml_backend_buffer_type
1898+
ggml_backend_kompute_buffer_types[GGML_KOMPUTE_MAX_DEVICES];
1899+
1900+
static bool ggml_backend_kompute_buffer_type_initialized = false;
1901+
1902+
if (!ggml_backend_kompute_buffer_type_initialized) {
1903+
for (int32_t i = 0; i < device_count; i++) {
1904+
ggml_backend_kompute_buffer_types[i] = {
1905+
/* .iface = */ ggml_backend_kompute_buffer_type_interface,
1906+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_kompute_reg(), i),
1907+
/* .context = */ new ggml_backend_kompute_buffer_type_context{ i, devices[i].bufferAlignment, devices[i].maxAlloc },
1908+
};
19211909
}
1922-
return vec;
1923-
}();
1910+
ggml_backend_kompute_buffer_type_initialized = true;
1911+
}
19241912

1925-
auto it = std::find_if(bufts.begin(), bufts.end(), [device](const ggml_backend_buffer_type & t) {
1926-
return device == static_cast<ggml_backend_kompute_buffer_type_context *>(t.context)->device;
1927-
});
1928-
return it < bufts.end() ? &*it : nullptr;
1913+
return &ggml_backend_kompute_buffer_types[device];
19291914
}
19301915

19311916
// backend
@@ -1953,16 +1938,6 @@ static ggml_status ggml_backend_kompute_graph_compute(ggml_backend_t backend, st
19531938
return GGML_STATUS_SUCCESS;
19541939
}
19551940

1956-
static bool ggml_backend_kompute_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
1957-
GGML_UNUSED(backend);
1958-
return ggml_vk_supports_op(op);
1959-
}
1960-
1961-
static bool ggml_backend_kompute_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
1962-
GGML_UNUSED(backend);
1963-
return buft->iface.get_name == ggml_backend_kompute_buffer_type_get_name;
1964-
}
1965-
19661941
static struct ggml_backend_i kompute_backend_i = {
19671942
/* .get_name = */ ggml_backend_kompute_name,
19681943
/* .free = */ ggml_backend_kompute_free,
@@ -1991,7 +1966,7 @@ ggml_backend_t ggml_backend_kompute_init(int device) {
19911966
ggml_backend_t kompute_backend = new ggml_backend {
19921967
/* .guid = */ ggml_backend_kompute_guid(),
19931968
/* .interface = */ kompute_backend_i,
1994-
/* .device = */ nullptr,
1969+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_kompute_reg(), device),
19951970
/* .context = */ s_kompute_context,
19961971
};
19971972

@@ -2001,3 +1976,167 @@ ggml_backend_t ggml_backend_kompute_init(int device) {
20011976
bool ggml_backend_is_kompute(ggml_backend_t backend) {
20021977
return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_kompute_guid());
20031978
}
1979+
1980+
static size_t ggml_backend_kompute_get_device_count() {
1981+
auto devices = ggml_vk_available_devices();
1982+
return devices.size();
1983+
}
1984+
1985+
static void ggml_backend_kompute_get_device_description(int device, char * description, size_t description_size) {
1986+
auto devices = ggml_vk_available_devices();
1987+
GGML_ASSERT((size_t) device < devices.size());
1988+
snprintf(description, description_size, "%s", devices[device].name);
1989+
}
1990+
1991+
static void ggml_backend_kompute_get_device_memory(int device, size_t * free, size_t * total) {
1992+
auto devices = ggml_vk_available_devices();
1993+
GGML_ASSERT((size_t) device < devices.size());
1994+
*total = devices[device].heapSize;
1995+
*free = devices[device].heapSize;
1996+
}
1997+
1998+
//////////////////////////
1999+
2000+
struct ggml_backend_kompute_device_context {
2001+
int device;
2002+
std::string name;
2003+
std::string description;
2004+
};
2005+
2006+
static const char * ggml_backend_kompute_device_get_name(ggml_backend_dev_t dev) {
2007+
ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
2008+
return ctx->name.c_str();
2009+
}
2010+
2011+
static const char * ggml_backend_kompute_device_get_description(ggml_backend_dev_t dev) {
2012+
ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
2013+
return ctx->description.c_str();
2014+
}
2015+
2016+
static void ggml_backend_kompute_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
2017+
ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
2018+
ggml_backend_kompute_get_device_memory(ctx->device, free, total);
2019+
}
2020+
2021+
static ggml_backend_buffer_type_t ggml_backend_kompute_device_get_buffer_type(ggml_backend_dev_t dev) {
2022+
ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
2023+
return ggml_backend_kompute_buffer_type(ctx->device);
2024+
}
2025+
2026+
static bool ggml_backend_kompute_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
2027+
if (buft->iface.get_name != ggml_backend_kompute_buffer_type_get_name) {
2028+
return false;
2029+
}
2030+
2031+
ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
2032+
ggml_backend_kompute_buffer_type_context * buft_ctx = (ggml_backend_kompute_buffer_type_context *)buft->context;
2033+
2034+
return buft_ctx->device == ctx->device;
2035+
}
2036+
2037+
static enum ggml_backend_dev_type ggml_backend_kompute_device_get_type(ggml_backend_dev_t dev) {
2038+
GGML_UNUSED(dev);
2039+
return GGML_BACKEND_DEVICE_TYPE_GPU;
2040+
}
2041+
2042+
static void ggml_backend_kompute_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
2043+
props->name = ggml_backend_kompute_device_get_name(dev);
2044+
props->description = ggml_backend_kompute_device_get_description(dev);
2045+
props->type = ggml_backend_kompute_device_get_type(dev);
2046+
ggml_backend_kompute_device_get_memory(dev, &props->memory_free, &props->memory_total);
2047+
props->caps = {
2048+
/* async = */ false,
2049+
/* host_buffer = */ false,
2050+
/* .buffer_from_host_ptr = */ false,
2051+
/* events = */ false,
2052+
};
2053+
}
2054+
2055+
static ggml_backend_t ggml_backend_kompute_device_init(ggml_backend_dev_t dev, const char * params) {
2056+
GGML_UNUSED(params);
2057+
ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context;
2058+
return ggml_backend_kompute_init(ctx->device);
2059+
}
2060+
2061+
static bool ggml_backend_kompute_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
2062+
const int min_batch_size = 32;
2063+
2064+
return (op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS) ||
2065+
(op->ne[2] >= min_batch_size && op->op == GGML_OP_MUL_MAT_ID);
2066+
2067+
GGML_UNUSED(dev);
2068+
}
2069+
2070+
static const struct ggml_backend_device_i ggml_backend_kompute_device_i = {
2071+
/* .get_name = */ ggml_backend_kompute_device_get_name,
2072+
/* .get_description = */ ggml_backend_kompute_device_get_description,
2073+
/* .get_memory = */ ggml_backend_kompute_device_get_memory,
2074+
/* .get_type = */ ggml_backend_kompute_device_get_type,
2075+
/* .get_props = */ ggml_backend_kompute_device_get_props,
2076+
/* .init_backend = */ ggml_backend_kompute_device_init,
2077+
/* .get_buffer_type = */ ggml_backend_kompute_device_get_buffer_type,
2078+
/* .get_host_buffer_type = */ NULL,
2079+
/* .buffer_from_host_ptr = */ NULL,
2080+
/* .supports_op = */ ggml_backend_kompute_device_supports_op,
2081+
/* .supports_buft = */ ggml_backend_kompute_device_supports_buft,
2082+
/* .offload_op = */ ggml_backend_kompute_device_offload_op,
2083+
/* .event_new = */ NULL,
2084+
/* .event_free = */ NULL,
2085+
/* .event_synchronize = */ NULL,
2086+
};
2087+
2088+
static const char * ggml_backend_kompute_reg_get_name(ggml_backend_reg_t reg) {
2089+
GGML_UNUSED(reg);
2090+
return "Kompute";
2091+
}
2092+
2093+
static size_t ggml_backend_kompute_reg_get_device_count(ggml_backend_reg_t reg) {
2094+
GGML_UNUSED(reg);
2095+
return ggml_backend_kompute_get_device_count();
2096+
}
2097+
2098+
static ggml_backend_dev_t ggml_backend_kompute_reg_get_device(ggml_backend_reg_t reg, size_t device) {
2099+
static std::vector<ggml_backend_dev_t> devices;
2100+
2101+
static bool initialized = false;
2102+
2103+
{
2104+
static std::mutex mutex;
2105+
std::lock_guard<std::mutex> lock(mutex);
2106+
if (!initialized) {
2107+
for (size_t i = 0; i < ggml_backend_kompute_get_device_count(); i++) {
2108+
ggml_backend_kompute_device_context * ctx = new ggml_backend_kompute_device_context;
2109+
char desc[256];
2110+
ggml_backend_kompute_get_device_description(i, desc, sizeof(desc));
2111+
ctx->device = i;
2112+
ctx->name = "Kompute" + std::to_string(i);
2113+
ctx->description = desc;
2114+
devices.push_back(new ggml_backend_device {
2115+
/* .iface = */ ggml_backend_kompute_device_i,
2116+
/* .reg = */ reg,
2117+
/* .context = */ ctx,
2118+
});
2119+
}
2120+
initialized = true;
2121+
}
2122+
}
2123+
2124+
GGML_ASSERT(device < devices.size());
2125+
return devices[device];
2126+
}
2127+
2128+
static const struct ggml_backend_reg_i ggml_backend_kompute_reg_i = {
2129+
/* .get_name = */ ggml_backend_kompute_reg_get_name,
2130+
/* .get_device_count = */ ggml_backend_kompute_reg_get_device_count,
2131+
/* .get_device = */ ggml_backend_kompute_reg_get_device,
2132+
/* .get_proc_address = */ NULL,
2133+
};
2134+
2135+
ggml_backend_reg_t ggml_backend_kompute_reg() {
2136+
static ggml_backend_reg reg = {
2137+
/* .iface = */ ggml_backend_kompute_reg_i,
2138+
/* .context = */ nullptr,
2139+
};
2140+
2141+
return &reg;
2142+
}

0 commit comments

Comments
 (0)