42
42
#include < cstring>
43
43
#include < iostream>
44
44
#include < memory>
45
+ #include < mutex>
45
46
#include < stdexcept>
46
47
#include < string>
47
48
#include < unordered_map>
@@ -273,18 +274,9 @@ static std::vector<ggml_vk_device> ggml_vk_available_devices_internal(size_t mem
273
274
return results;
274
275
}
275
276
276
- // public API returns a C-style array
277
- ggml_vk_device * ggml_vk_available_devices (size_t memoryRequired, size_t * count) {
278
- auto devices = ggml_vk_available_devices_internal (memoryRequired);
279
- *count = devices.size ();
280
- if (devices.empty ()) {
281
- return nullptr ;
282
- }
283
-
284
- size_t nbytes = sizeof (ggml_vk_device) * (devices.size ());
285
- auto * arr = static_cast <ggml_vk_device *>(malloc (nbytes));
286
- memcpy (arr, devices.data (), nbytes);
287
- return arr;
277
+ static std::vector<ggml_vk_device>& ggml_vk_available_devices () {
278
+ static std::vector<ggml_vk_device> devices = ggml_vk_available_devices_internal (0 );
279
+ return devices;
288
280
}
289
281
290
282
static void ggml_vk_filterByVendor (std::vector<ggml_vk_device>& devices, const std::string& targetVendor) {
@@ -341,7 +333,7 @@ ggml_vk_device ggml_vk_current_device() {
341
333
if (!komputeManager ()->hasDevice ())
342
334
return ggml_vk_device ();
343
335
344
- auto devices = ggml_vk_available_devices_internal ( 0 );
336
+ auto devices = ggml_vk_available_devices ( );
345
337
ggml_vk_filterByName (devices, komputeManager ()->physicalDevice ()->getProperties ().deviceName .data ());
346
338
GGML_ASSERT (!devices.empty ());
347
339
return devices.front ();
@@ -1323,17 +1315,7 @@ static void ggml_vk_cpy_f16_f32(Args&&... args) {
1323
1315
ggml_vk_cpy (spirv, 2 , 4 , std::forward<Args>(args)...);
1324
1316
}
1325
1317
1326
- static bool ggml_vk_supports_op (const struct ggml_tensor * op) {
1327
- switch (op->type ) {
1328
- case GGML_TYPE_F16:
1329
- case GGML_TYPE_F32:
1330
- case GGML_TYPE_Q4_0:
1331
- case GGML_TYPE_Q4_1:
1332
- break ;
1333
- default :
1334
- return false ;
1335
- }
1336
-
1318
+ static bool ggml_backend_kompute_device_supports_op (ggml_backend_dev_t dev, const struct ggml_tensor * op) {
1337
1319
switch (op->op ) {
1338
1320
case GGML_OP_UNARY:
1339
1321
switch (ggml_get_unary_op (op)) {
@@ -1410,6 +1392,8 @@ static bool ggml_vk_supports_op(const struct ggml_tensor * op) {
1410
1392
;
1411
1393
}
1412
1394
return false ;
1395
+
1396
+ GGML_UNUSED (dev);
1413
1397
}
1414
1398
1415
1399
static void ggml_vk_graph_compute (struct ggml_kompute_context * ctx, struct ggml_cgraph * gf) {
@@ -1458,10 +1442,12 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
1458
1442
1459
1443
any_commands_recorded = true ;
1460
1444
1445
+ /* Do we still need this?
1461
1446
if (!ggml_vk_supports_op(dst)) {
1462
1447
fprintf(stderr, "%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst));
1463
1448
GGML_ABORT("unsupported op");
1464
1449
}
1450
+ */
1465
1451
1466
1452
const int32_t ne00 = src0 ? src0->ne [0 ] : 0 ;
1467
1453
const int32_t ne01 = src0 ? src0->ne [1 ] : 0 ;
@@ -1913,25 +1899,30 @@ static ggml_backend_buffer_type_i ggml_backend_kompute_buffer_type_interface = {
1913
1899
};
1914
1900
1915
1901
ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type (int device) {
1916
- static std::vector<ggml_backend_buffer_type> bufts = []() {
1917
- std::vector<ggml_backend_buffer_type> vec;
1918
- auto devices = ggml_vk_available_devices_internal (0 );
1919
- vec.reserve (devices.size ());
1920
-
1921
- for (const auto & dev : devices) {
1922
- vec.push_back ({
1923
- /* .iface = */ ggml_backend_kompute_buffer_type_interface,
1924
- /* .device = */ nullptr ,
1925
- /* .context = */ new ggml_backend_kompute_buffer_type_context (dev.index , dev.bufferAlignment , dev.maxAlloc )
1926
- });
1902
+ static std::mutex mutex;
1903
+ std::lock_guard<std::mutex> lock (mutex);
1904
+
1905
+ auto devices = ggml_vk_available_devices ();
1906
+ GGML_ASSERT ((size_t ) device < devices.size ());
1907
+ GGML_ASSERT (devices.size () <= GGML_KOMPUTE_MAX_DEVICES);
1908
+
1909
+ static ggml_backend_buffer_type
1910
+ ggml_backend_kompute_buffer_types[GGML_KOMPUTE_MAX_DEVICES];
1911
+
1912
+ static bool ggml_backend_kompute_buffer_type_initialized = false ;
1913
+
1914
+ if (!ggml_backend_kompute_buffer_type_initialized) {
1915
+ for (int32_t i = 0 ; i < devices.size (); i++) {
1916
+ ggml_backend_kompute_buffer_types[i] = {
1917
+ /* .iface = */ ggml_backend_kompute_buffer_type_interface,
1918
+ /* .device = */ ggml_backend_reg_dev_get (ggml_backend_kompute_reg (), i),
1919
+ /* .context = */ new ggml_backend_kompute_buffer_type_context{ i, devices[i].bufferAlignment , devices[i].maxAlloc },
1920
+ };
1927
1921
}
1928
- return vec ;
1929
- }();
1922
+ ggml_backend_kompute_buffer_type_initialized = true ;
1923
+ }
1930
1924
1931
- auto it = std::find_if (bufts.begin (), bufts.end (), [device](const ggml_backend_buffer_type & t) {
1932
- return device == static_cast <ggml_backend_kompute_buffer_type_context *>(t.context )->device ;
1933
- });
1934
- return it < bufts.end () ? &*it : nullptr ;
1925
+ return &ggml_backend_kompute_buffer_types[device];
1935
1926
}
1936
1927
1937
1928
// backend
@@ -1964,16 +1955,6 @@ static ggml_status ggml_backend_kompute_graph_compute(ggml_backend_t backend, st
1964
1955
return GGML_STATUS_SUCCESS;
1965
1956
}
1966
1957
1967
- static bool ggml_backend_kompute_supports_op (ggml_backend_t backend, const struct ggml_tensor * op) {
1968
- GGML_UNUSED (backend);
1969
- return ggml_vk_supports_op (op);
1970
- }
1971
-
1972
- static bool ggml_backend_kompute_supports_buft (ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
1973
- GGML_UNUSED (backend);
1974
- return buft->iface .get_name == ggml_backend_kompute_buffer_type_get_name;
1975
- }
1976
-
1977
1958
static struct ggml_backend_i kompute_backend_i = {
1978
1959
/* .get_name = */ ggml_backend_kompute_name,
1979
1960
/* .free = */ ggml_backend_kompute_free,
@@ -1987,8 +1968,8 @@ static struct ggml_backend_i kompute_backend_i = {
1987
1968
/* .graph_plan_update = */ NULL ,
1988
1969
/* .graph_plan_compute = */ NULL ,
1989
1970
/* .graph_compute = */ ggml_backend_kompute_graph_compute,
1990
- /* .supports_op = */ ggml_backend_kompute_supports_op ,
1991
- /* .supports_buft = */ ggml_backend_kompute_supports_buft ,
1971
+ /* .supports_op = */ NULL ,
1972
+ /* .supports_buft = */ NULL ,
1992
1973
/* .offload_op = */ NULL ,
1993
1974
/* .event_record = */ NULL ,
1994
1975
/* .event_wait = */ NULL ,
@@ -2006,7 +1987,7 @@ ggml_backend_t ggml_backend_kompute_init(int device) {
2006
1987
ggml_backend_t kompute_backend = new ggml_backend {
2007
1988
/* .guid = */ ggml_backend_kompute_guid (),
2008
1989
/* .interface = */ kompute_backend_i,
2009
- /* .device = */ nullptr ,
1990
+ /* .device = */ ggml_backend_reg_dev_get ( ggml_backend_kompute_reg (), device) ,
2010
1991
/* .context = */ s_kompute_context,
2011
1992
};
2012
1993
@@ -2016,3 +1997,167 @@ ggml_backend_t ggml_backend_kompute_init(int device) {
2016
1997
bool ggml_backend_is_kompute (ggml_backend_t backend) {
2017
1998
return backend != NULL && ggml_guid_matches (backend->guid , ggml_backend_kompute_guid ());
2018
1999
}
2000
+
2001
+ static size_t ggml_backend_kompute_get_device_count () {
2002
+ auto devices = ggml_vk_available_devices ();
2003
+ return devices.size ();
2004
+ }
2005
+
2006
+ static void ggml_backend_kompute_get_device_description (int device, char * description, size_t description_size) {
2007
+ auto devices = ggml_vk_available_devices ();
2008
+ GGML_ASSERT ((size_t ) device < devices.size ());
2009
+ snprintf (description, description_size, " %s" , devices[device].name );
2010
+ }
2011
+
2012
+ static void ggml_backend_kompute_get_device_memory (int device, size_t * free, size_t * total) {
2013
+ auto devices = ggml_vk_available_devices ();
2014
+ GGML_ASSERT ((size_t ) device < devices.size ());
2015
+ *total = devices[device].heapSize ;
2016
+ *free = devices[device].heapSize ;
2017
+ }
2018
+
2019
+ // ////////////////////////
2020
+
2021
+ struct ggml_backend_kompute_device_context {
2022
+ int device;
2023
+ std::string name;
2024
+ std::string description;
2025
+ };
2026
+
2027
+ static const char * ggml_backend_kompute_device_get_name (ggml_backend_dev_t dev) {
2028
+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context ;
2029
+ return ctx->name .c_str ();
2030
+ }
2031
+
2032
+ static const char * ggml_backend_kompute_device_get_description (ggml_backend_dev_t dev) {
2033
+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context ;
2034
+ return ctx->description .c_str ();
2035
+ }
2036
+
2037
+ static void ggml_backend_kompute_device_get_memory (ggml_backend_dev_t dev, size_t * free, size_t * total) {
2038
+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context ;
2039
+ ggml_backend_kompute_get_device_memory (ctx->device , free, total);
2040
+ }
2041
+
2042
+ static ggml_backend_buffer_type_t ggml_backend_kompute_device_get_buffer_type (ggml_backend_dev_t dev) {
2043
+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context ;
2044
+ return ggml_backend_kompute_buffer_type (ctx->device );
2045
+ }
2046
+
2047
+ static bool ggml_backend_kompute_device_supports_buft (ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
2048
+ if (buft->iface .get_name != ggml_backend_kompute_buffer_type_get_name) {
2049
+ return false ;
2050
+ }
2051
+
2052
+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context ;
2053
+ ggml_backend_kompute_buffer_type_context * buft_ctx = (ggml_backend_kompute_buffer_type_context *)buft->context ;
2054
+
2055
+ return buft_ctx->device == ctx->device ;
2056
+ }
2057
+
2058
+ static enum ggml_backend_dev_type ggml_backend_kompute_device_get_type (ggml_backend_dev_t dev) {
2059
+ GGML_UNUSED (dev);
2060
+ return GGML_BACKEND_DEVICE_TYPE_GPU_FULL;
2061
+ }
2062
+
2063
+ static void ggml_backend_kompute_device_get_props (ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
2064
+ props->name = ggml_backend_kompute_device_get_name (dev);
2065
+ props->description = ggml_backend_kompute_device_get_description (dev);
2066
+ props->type = ggml_backend_kompute_device_get_type (dev);
2067
+ ggml_backend_kompute_device_get_memory (dev, &props->memory_free , &props->memory_total );
2068
+ props->caps = {
2069
+ /* async = */ false ,
2070
+ /* host_buffer = */ false ,
2071
+ /* .buffer_from_host_ptr = */ false ,
2072
+ /* events = */ false ,
2073
+ };
2074
+ }
2075
+
2076
+ static ggml_backend_t ggml_backend_kompute_device_init (ggml_backend_dev_t dev, const char * params) {
2077
+ GGML_UNUSED (params);
2078
+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context ;
2079
+ return ggml_backend_kompute_init (ctx->device );
2080
+ }
2081
+
2082
+ static bool ggml_backend_kompute_device_offload_op (ggml_backend_dev_t dev, const ggml_tensor * op) {
2083
+ const int min_batch_size = 32 ;
2084
+
2085
+ return (op->ne [1 ] >= min_batch_size && op->op != GGML_OP_GET_ROWS) ||
2086
+ (op->ne [2 ] >= min_batch_size && op->op == GGML_OP_MUL_MAT_ID);
2087
+
2088
+ GGML_UNUSED (dev);
2089
+ }
2090
+
2091
+ static const struct ggml_backend_device_i ggml_backend_kompute_device_i = {
2092
+ /* .get_name = */ ggml_backend_kompute_device_get_name,
2093
+ /* .get_description = */ ggml_backend_kompute_device_get_description,
2094
+ /* .get_memory = */ ggml_backend_kompute_device_get_memory,
2095
+ /* .get_type = */ ggml_backend_kompute_device_get_type,
2096
+ /* .get_props = */ ggml_backend_kompute_device_get_props,
2097
+ /* .init_backend = */ ggml_backend_kompute_device_init,
2098
+ /* .get_buffer_type = */ ggml_backend_kompute_device_get_buffer_type,
2099
+ /* .get_host_buffer_type = */ NULL ,
2100
+ /* .buffer_from_host_ptr = */ NULL ,
2101
+ /* .supports_op = */ ggml_backend_kompute_device_supports_op,
2102
+ /* .supports_buft = */ ggml_backend_kompute_device_supports_buft,
2103
+ /* .offload_op = */ ggml_backend_kompute_device_offload_op,
2104
+ /* .event_new = */ NULL ,
2105
+ /* .event_free = */ NULL ,
2106
+ /* .event_synchronize = */ NULL ,
2107
+ };
2108
+
2109
+ static const char * ggml_backend_kompute_reg_get_name (ggml_backend_reg_t reg) {
2110
+ GGML_UNUSED (reg);
2111
+ return " Kompute" ;
2112
+ }
2113
+
2114
+ static size_t ggml_backend_kompute_reg_get_device_count (ggml_backend_reg_t reg) {
2115
+ GGML_UNUSED (reg);
2116
+ return ggml_backend_kompute_get_device_count ();
2117
+ }
2118
+
2119
+ static ggml_backend_dev_t ggml_backend_kompute_reg_get_device (ggml_backend_reg_t reg, size_t device) {
2120
+ static std::vector<ggml_backend_dev_t > devices;
2121
+
2122
+ static bool initialized = false ;
2123
+
2124
+ {
2125
+ static std::mutex mutex;
2126
+ std::lock_guard<std::mutex> lock (mutex);
2127
+ if (!initialized) {
2128
+ for (size_t i = 0 ; i < ggml_backend_kompute_get_device_count (); i++) {
2129
+ ggml_backend_kompute_device_context * ctx = new ggml_backend_kompute_device_context;
2130
+ char desc[256 ];
2131
+ ggml_backend_kompute_get_device_description (i, desc, sizeof (desc));
2132
+ ctx->device = i;
2133
+ ctx->name = " Kompute" + std::to_string (i);
2134
+ ctx->description = desc;
2135
+ devices.push_back (new ggml_backend_device {
2136
+ /* .iface = */ ggml_backend_kompute_device_i,
2137
+ /* .reg = */ reg,
2138
+ /* .context = */ ctx,
2139
+ });
2140
+ }
2141
+ initialized = true ;
2142
+ }
2143
+ }
2144
+
2145
+ GGML_ASSERT (device < devices.size ());
2146
+ return devices[device];
2147
+ }
2148
+
2149
+ static const struct ggml_backend_reg_i ggml_backend_kompute_reg_i = {
2150
+ /* .get_name = */ ggml_backend_kompute_reg_get_name,
2151
+ /* .get_device_count = */ ggml_backend_kompute_reg_get_device_count,
2152
+ /* .get_device = */ ggml_backend_kompute_reg_get_device,
2153
+ /* .get_proc_address = */ NULL ,
2154
+ };
2155
+
2156
+ ggml_backend_reg_t ggml_backend_kompute_reg () {
2157
+ static ggml_backend_reg reg = {
2158
+ /* .iface = */ ggml_backend_kompute_reg_i,
2159
+ /* .context = */ nullptr ,
2160
+ };
2161
+
2162
+ return ®
2163
+ }
0 commit comments