42
42
#include < cstring>
43
43
#include < iostream>
44
44
#include < memory>
45
+ #include < mutex>
45
46
#include < stdexcept>
46
47
#include < string>
47
48
#include < unordered_map>
@@ -273,18 +274,9 @@ static std::vector<ggml_vk_device> ggml_vk_available_devices_internal(size_t mem
273
274
return results;
274
275
}
275
276
276
- // public API returns a C-style array
277
- ggml_vk_device * ggml_vk_available_devices (size_t memoryRequired, size_t * count) {
278
- auto devices = ggml_vk_available_devices_internal (memoryRequired);
279
- *count = devices.size ();
280
- if (devices.empty ()) {
281
- return nullptr ;
282
- }
283
-
284
- size_t nbytes = sizeof (ggml_vk_device) * (devices.size ());
285
- auto * arr = static_cast <ggml_vk_device *>(malloc (nbytes));
286
- memcpy (arr, devices.data (), nbytes);
287
- return arr;
277
+ static std::vector<ggml_vk_device>& ggml_vk_available_devices () {
278
+ static std::vector<ggml_vk_device> devices = ggml_vk_available_devices_internal (0 );
279
+ return devices;
288
280
}
289
281
290
282
static void ggml_vk_filterByVendor (std::vector<ggml_vk_device>& devices, const std::string& targetVendor) {
@@ -341,7 +333,7 @@ ggml_vk_device ggml_vk_current_device() {
341
333
if (!komputeManager ()->hasDevice ())
342
334
return ggml_vk_device ();
343
335
344
- auto devices = ggml_vk_available_devices_internal ( 0 );
336
+ auto devices = ggml_vk_available_devices ( );
345
337
ggml_vk_filterByName (devices, komputeManager ()->physicalDevice ()->getProperties ().deviceName .data ());
346
338
GGML_ASSERT (!devices.empty ());
347
339
return devices.front ();
@@ -1323,17 +1315,7 @@ static void ggml_vk_cpy_f16_f32(Args&&... args) {
1323
1315
ggml_vk_cpy (spirv, 2 , 4 , std::forward<Args>(args)...);
1324
1316
}
1325
1317
1326
- static bool ggml_vk_supports_op (const struct ggml_tensor * op) {
1327
- switch (op->type ) {
1328
- case GGML_TYPE_F16:
1329
- case GGML_TYPE_F32:
1330
- case GGML_TYPE_Q4_0:
1331
- case GGML_TYPE_Q4_1:
1332
- break ;
1333
- default :
1334
- return false ;
1335
- }
1336
-
1318
+ static bool ggml_backend_kompute_device_supports_op (ggml_backend_dev_t dev, const struct ggml_tensor * op) {
1337
1319
switch (op->op ) {
1338
1320
case GGML_OP_UNARY:
1339
1321
switch (ggml_get_unary_op (op)) {
@@ -1410,6 +1392,8 @@ static bool ggml_vk_supports_op(const struct ggml_tensor * op) {
1410
1392
;
1411
1393
}
1412
1394
return false ;
1395
+
1396
+ GGML_UNUSED (dev);
1413
1397
}
1414
1398
1415
1399
static void ggml_vk_graph_compute (struct ggml_kompute_context * ctx, struct ggml_cgraph * gf) {
@@ -1458,11 +1442,6 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
1458
1442
1459
1443
any_commands_recorded = true ;
1460
1444
1461
- if (!ggml_vk_supports_op (dst)) {
1462
- fprintf (stderr, " %s: error: unsupported op '%s'\n " , __func__, ggml_op_desc (dst));
1463
- GGML_ABORT (" unsupported op" );
1464
- }
1465
-
1466
1445
const int32_t ne00 = src0 ? src0->ne [0 ] : 0 ;
1467
1446
const int32_t ne01 = src0 ? src0->ne [1 ] : 0 ;
1468
1447
const int32_t ne02 = src0 ? src0->ne [2 ] : 0 ;
@@ -1907,25 +1886,31 @@ static ggml_backend_buffer_type_i ggml_backend_kompute_buffer_type_interface = {
1907
1886
};
1908
1887
1909
1888
ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type (int device) {
1910
- static std::vector<ggml_backend_buffer_type> bufts = []() {
1911
- std::vector<ggml_backend_buffer_type> vec;
1912
- auto devices = ggml_vk_available_devices_internal (0 );
1913
- vec.reserve (devices.size ());
1914
-
1915
- for (const auto & dev : devices) {
1916
- vec.push_back ({
1917
- /* .iface = */ ggml_backend_kompute_buffer_type_interface,
1918
- /* .device = */ nullptr ,
1919
- /* .context = */ new ggml_backend_kompute_buffer_type_context (dev.index , dev.bufferAlignment , dev.maxAlloc )
1920
- });
1889
+ static std::mutex mutex;
1890
+ std::lock_guard<std::mutex> lock (mutex);
1891
+
1892
+ auto devices = ggml_vk_available_devices ();
1893
+ int32_t device_count = (int32_t ) devices.size ();
1894
+ GGML_ASSERT (device < device_count);
1895
+ GGML_ASSERT (devices.size () <= GGML_KOMPUTE_MAX_DEVICES);
1896
+
1897
+ static ggml_backend_buffer_type
1898
+ ggml_backend_kompute_buffer_types[GGML_KOMPUTE_MAX_DEVICES];
1899
+
1900
+ static bool ggml_backend_kompute_buffer_type_initialized = false ;
1901
+
1902
+ if (!ggml_backend_kompute_buffer_type_initialized) {
1903
+ for (int32_t i = 0 ; i < device_count; i++) {
1904
+ ggml_backend_kompute_buffer_types[i] = {
1905
+ /* .iface = */ ggml_backend_kompute_buffer_type_interface,
1906
+ /* .device = */ ggml_backend_reg_dev_get (ggml_backend_kompute_reg (), i),
1907
+ /* .context = */ new ggml_backend_kompute_buffer_type_context{ i, devices[i].bufferAlignment , devices[i].maxAlloc },
1908
+ };
1921
1909
}
1922
- return vec ;
1923
- }();
1910
+ ggml_backend_kompute_buffer_type_initialized = true ;
1911
+ }
1924
1912
1925
- auto it = std::find_if (bufts.begin (), bufts.end (), [device](const ggml_backend_buffer_type & t) {
1926
- return device == static_cast <ggml_backend_kompute_buffer_type_context *>(t.context )->device ;
1927
- });
1928
- return it < bufts.end () ? &*it : nullptr ;
1913
+ return &ggml_backend_kompute_buffer_types[device];
1929
1914
}
1930
1915
1931
1916
// backend
@@ -1953,16 +1938,6 @@ static ggml_status ggml_backend_kompute_graph_compute(ggml_backend_t backend, st
1953
1938
return GGML_STATUS_SUCCESS;
1954
1939
}
1955
1940
1956
- static bool ggml_backend_kompute_supports_op (ggml_backend_t backend, const struct ggml_tensor * op) {
1957
- GGML_UNUSED (backend);
1958
- return ggml_vk_supports_op (op);
1959
- }
1960
-
1961
- static bool ggml_backend_kompute_supports_buft (ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
1962
- GGML_UNUSED (backend);
1963
- return buft->iface .get_name == ggml_backend_kompute_buffer_type_get_name;
1964
- }
1965
-
1966
1941
static struct ggml_backend_i kompute_backend_i = {
1967
1942
/* .get_name = */ ggml_backend_kompute_name,
1968
1943
/* .free = */ ggml_backend_kompute_free,
@@ -1991,7 +1966,7 @@ ggml_backend_t ggml_backend_kompute_init(int device) {
1991
1966
ggml_backend_t kompute_backend = new ggml_backend {
1992
1967
/* .guid = */ ggml_backend_kompute_guid (),
1993
1968
/* .interface = */ kompute_backend_i,
1994
- /* .device = */ nullptr ,
1969
+ /* .device = */ ggml_backend_reg_dev_get ( ggml_backend_kompute_reg (), device) ,
1995
1970
/* .context = */ s_kompute_context,
1996
1971
};
1997
1972
@@ -2001,3 +1976,167 @@ ggml_backend_t ggml_backend_kompute_init(int device) {
2001
1976
bool ggml_backend_is_kompute (ggml_backend_t backend) {
2002
1977
return backend != NULL && ggml_guid_matches (backend->guid , ggml_backend_kompute_guid ());
2003
1978
}
1979
+
1980
+ static size_t ggml_backend_kompute_get_device_count () {
1981
+ auto devices = ggml_vk_available_devices ();
1982
+ return devices.size ();
1983
+ }
1984
+
1985
+ static void ggml_backend_kompute_get_device_description (int device, char * description, size_t description_size) {
1986
+ auto devices = ggml_vk_available_devices ();
1987
+ GGML_ASSERT ((size_t ) device < devices.size ());
1988
+ snprintf (description, description_size, " %s" , devices[device].name );
1989
+ }
1990
+
1991
+ static void ggml_backend_kompute_get_device_memory (int device, size_t * free, size_t * total) {
1992
+ auto devices = ggml_vk_available_devices ();
1993
+ GGML_ASSERT ((size_t ) device < devices.size ());
1994
+ *total = devices[device].heapSize ;
1995
+ *free = devices[device].heapSize ;
1996
+ }
1997
+
1998
+ // ////////////////////////
1999
+
2000
+ struct ggml_backend_kompute_device_context {
2001
+ int device;
2002
+ std::string name;
2003
+ std::string description;
2004
+ };
2005
+
2006
+ static const char * ggml_backend_kompute_device_get_name (ggml_backend_dev_t dev) {
2007
+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context ;
2008
+ return ctx->name .c_str ();
2009
+ }
2010
+
2011
+ static const char * ggml_backend_kompute_device_get_description (ggml_backend_dev_t dev) {
2012
+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context ;
2013
+ return ctx->description .c_str ();
2014
+ }
2015
+
2016
+ static void ggml_backend_kompute_device_get_memory (ggml_backend_dev_t dev, size_t * free, size_t * total) {
2017
+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context ;
2018
+ ggml_backend_kompute_get_device_memory (ctx->device , free, total);
2019
+ }
2020
+
2021
+ static ggml_backend_buffer_type_t ggml_backend_kompute_device_get_buffer_type (ggml_backend_dev_t dev) {
2022
+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context ;
2023
+ return ggml_backend_kompute_buffer_type (ctx->device );
2024
+ }
2025
+
2026
+ static bool ggml_backend_kompute_device_supports_buft (ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
2027
+ if (buft->iface .get_name != ggml_backend_kompute_buffer_type_get_name) {
2028
+ return false ;
2029
+ }
2030
+
2031
+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context ;
2032
+ ggml_backend_kompute_buffer_type_context * buft_ctx = (ggml_backend_kompute_buffer_type_context *)buft->context ;
2033
+
2034
+ return buft_ctx->device == ctx->device ;
2035
+ }
2036
+
2037
+ static enum ggml_backend_dev_type ggml_backend_kompute_device_get_type (ggml_backend_dev_t dev) {
2038
+ GGML_UNUSED (dev);
2039
+ return GGML_BACKEND_DEVICE_TYPE_GPU;
2040
+ }
2041
+
2042
+ static void ggml_backend_kompute_device_get_props (ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
2043
+ props->name = ggml_backend_kompute_device_get_name (dev);
2044
+ props->description = ggml_backend_kompute_device_get_description (dev);
2045
+ props->type = ggml_backend_kompute_device_get_type (dev);
2046
+ ggml_backend_kompute_device_get_memory (dev, &props->memory_free , &props->memory_total );
2047
+ props->caps = {
2048
+ /* async = */ false ,
2049
+ /* host_buffer = */ false ,
2050
+ /* .buffer_from_host_ptr = */ false ,
2051
+ /* events = */ false ,
2052
+ };
2053
+ }
2054
+
2055
+ static ggml_backend_t ggml_backend_kompute_device_init (ggml_backend_dev_t dev, const char * params) {
2056
+ GGML_UNUSED (params);
2057
+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context ;
2058
+ return ggml_backend_kompute_init (ctx->device );
2059
+ }
2060
+
2061
+ static bool ggml_backend_kompute_device_offload_op (ggml_backend_dev_t dev, const ggml_tensor * op) {
2062
+ const int min_batch_size = 32 ;
2063
+
2064
+ return (op->ne [1 ] >= min_batch_size && op->op != GGML_OP_GET_ROWS) ||
2065
+ (op->ne [2 ] >= min_batch_size && op->op == GGML_OP_MUL_MAT_ID);
2066
+
2067
+ GGML_UNUSED (dev);
2068
+ }
2069
+
2070
+ static const struct ggml_backend_device_i ggml_backend_kompute_device_i = {
2071
+ /* .get_name = */ ggml_backend_kompute_device_get_name,
2072
+ /* .get_description = */ ggml_backend_kompute_device_get_description,
2073
+ /* .get_memory = */ ggml_backend_kompute_device_get_memory,
2074
+ /* .get_type = */ ggml_backend_kompute_device_get_type,
2075
+ /* .get_props = */ ggml_backend_kompute_device_get_props,
2076
+ /* .init_backend = */ ggml_backend_kompute_device_init,
2077
+ /* .get_buffer_type = */ ggml_backend_kompute_device_get_buffer_type,
2078
+ /* .get_host_buffer_type = */ NULL ,
2079
+ /* .buffer_from_host_ptr = */ NULL ,
2080
+ /* .supports_op = */ ggml_backend_kompute_device_supports_op,
2081
+ /* .supports_buft = */ ggml_backend_kompute_device_supports_buft,
2082
+ /* .offload_op = */ ggml_backend_kompute_device_offload_op,
2083
+ /* .event_new = */ NULL ,
2084
+ /* .event_free = */ NULL ,
2085
+ /* .event_synchronize = */ NULL ,
2086
+ };
2087
+
2088
+ static const char * ggml_backend_kompute_reg_get_name (ggml_backend_reg_t reg) {
2089
+ GGML_UNUSED (reg);
2090
+ return " Kompute" ;
2091
+ }
2092
+
2093
+ static size_t ggml_backend_kompute_reg_get_device_count (ggml_backend_reg_t reg) {
2094
+ GGML_UNUSED (reg);
2095
+ return ggml_backend_kompute_get_device_count ();
2096
+ }
2097
+
2098
+ static ggml_backend_dev_t ggml_backend_kompute_reg_get_device (ggml_backend_reg_t reg, size_t device) {
2099
+ static std::vector<ggml_backend_dev_t > devices;
2100
+
2101
+ static bool initialized = false ;
2102
+
2103
+ {
2104
+ static std::mutex mutex;
2105
+ std::lock_guard<std::mutex> lock (mutex);
2106
+ if (!initialized) {
2107
+ for (size_t i = 0 ; i < ggml_backend_kompute_get_device_count (); i++) {
2108
+ ggml_backend_kompute_device_context * ctx = new ggml_backend_kompute_device_context;
2109
+ char desc[256 ];
2110
+ ggml_backend_kompute_get_device_description (i, desc, sizeof (desc));
2111
+ ctx->device = i;
2112
+ ctx->name = " Kompute" + std::to_string (i);
2113
+ ctx->description = desc;
2114
+ devices.push_back (new ggml_backend_device {
2115
+ /* .iface = */ ggml_backend_kompute_device_i,
2116
+ /* .reg = */ reg,
2117
+ /* .context = */ ctx,
2118
+ });
2119
+ }
2120
+ initialized = true ;
2121
+ }
2122
+ }
2123
+
2124
+ GGML_ASSERT (device < devices.size ());
2125
+ return devices[device];
2126
+ }
2127
+
2128
+ static const struct ggml_backend_reg_i ggml_backend_kompute_reg_i = {
2129
+ /* .get_name = */ ggml_backend_kompute_reg_get_name,
2130
+ /* .get_device_count = */ ggml_backend_kompute_reg_get_device_count,
2131
+ /* .get_device = */ ggml_backend_kompute_reg_get_device,
2132
+ /* .get_proc_address = */ NULL ,
2133
+ };
2134
+
2135
+ ggml_backend_reg_t ggml_backend_kompute_reg () {
2136
+ static ggml_backend_reg reg = {
2137
+ /* .iface = */ ggml_backend_kompute_reg_i,
2138
+ /* .context = */ nullptr ,
2139
+ };
2140
+
2141
+ return ®
2142
+ }
0 commit comments