Skip to content

Commit 8da28db

Browse files
committed
First tests for host memory pool
1 parent a29f087 commit 8da28db

File tree

4 files changed

+125
-0
lines changed

4 files changed

+125
-0
lines changed

ggml/src/ggml-sycl/common.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ void* ggml_sycl_host_malloc(size_t size) try {
2929
dpct::err0 err = CHECK_TRY_ERROR(
3030
ptr = (void*)sycl::malloc_host(size, dpct::get_in_order_queue()));
3131

32+
printf("Luigi\n");
3233
if (err != 0) {
3334
// clear the error
3435
GGML_LOG_ERROR("WARNING: failed to allocate %.2f MB of pinned memory: %s\n", size / 1024.0 / 1024.0, "syclGetErrorString is not supported");

ggml/src/ggml-sycl/common.hpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,8 +333,12 @@ struct ggml_backend_sycl_context {
333333
// pool
334334
std::unique_ptr<ggml_sycl_pool> pools[GGML_SYCL_MAX_DEVICES];
335335

336+
std::unique_ptr<ggml_sycl_pool> host_pools[GGML_SYCL_MAX_DEVICES];
337+
336338
static std::unique_ptr<ggml_sycl_pool> new_pool_for_device(queue_ptr qptr, int device);
337339

340+
static std::unique_ptr<ggml_sycl_pool> new_pool_for_host(queue_ptr qptr, int device);
341+
338342
ggml_sycl_pool & pool(int device) {
339343
if (pools[device] == nullptr) {
340344
pools[device] = new_pool_for_device(stream(device,0), device);
@@ -345,6 +349,17 @@ struct ggml_backend_sycl_context {
345349
ggml_sycl_pool & pool() {
346350
return pool(device);
347351
}
352+
353+
ggml_sycl_pool & host_pool(int device) {
354+
if (host_pools[device] == nullptr) {
355+
host_pools[device] = new_pool_for_host(stream(device,0), device);
356+
}
357+
return *host_pools[device];
358+
}
359+
360+
ggml_sycl_pool & host_pool() {
361+
return host_pool(device);
362+
}
348363
};
349364

350365
// common device functions

ggml/src/ggml-sycl/dpct/helper.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,14 @@
1919
#include <oneapi/mkl.hpp>
2020
#include <map>
2121

22+
#include "ggml-sycl.h"
2223
#include "ggml.h"
2324

25+
#include "ggml-backend.h"
26+
#include "ggml-backend-impl.h"
27+
#include "ggml-alloc.h"
28+
#include "ggml-impl.h"
29+
2430
#if defined(__linux__)
2531
#include <sys/mman.h>
2632
#elif defined(_WIN64)
@@ -1745,6 +1751,11 @@ namespace dpct
17451751
Ts alpha_value = dpct::get_value(reinterpret_cast<const Ts *>(alpha), q);
17461752
Ts beta_value = dpct::get_value(reinterpret_cast<const Ts *>(beta), q);
17471753

1754+
//ggml_backend_sycl_host_buffer_type()->alloc_buffer;
1755+
auto tmp = ggml_backend_sycl_reg();
1756+
std::cout << "this is WARIO " << tmp->iface.get_name(tmp) << '\n';
1757+
1758+
17481759
matrix_info_t *matrix_info =
17491760
(matrix_info_t *)std::malloc(sizeof(matrix_info_t));
17501761
matrix_info->transpose_info[0] = a_trans;

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,7 @@ static void ggml_backend_sycl_host_buffer_free_buffer(ggml_backend_buffer_t buff
10381038

10391039
static ggml_backend_buffer_t ggml_backend_sycl_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
10401040
void * ptr = ggml_sycl_host_malloc(size);
1041+
printf("Mario\n");
10411042

10421043
if (ptr == nullptr) {
10431044
// fallback to cpu buffer
@@ -1173,6 +1174,103 @@ struct ggml_sycl_pool_leg : public ggml_sycl_pool {
11731174
}
11741175
};
11751176

1177+
struct ggml_sycl_pool_host : public ggml_sycl_pool {
1178+
1179+
int device;
1180+
queue_ptr qptr;
1181+
struct ggml_sycl_buffer {
1182+
void * ptr = nullptr;
1183+
size_t size = 0;
1184+
};
1185+
1186+
// Set arbitrarly to 16
1187+
static constexpr uint MAX_POOL_SIZE{16};
1188+
ggml_sycl_buffer buffer_pool[MAX_POOL_SIZE] = {};
1189+
size_t pool_size = 0;
1190+
1191+
explicit ggml_sycl_pool_host(queue_ptr qptr_, int device_) :
1192+
qptr(qptr_),
1193+
device(device_) {
1194+
}
1195+
1196+
~ggml_sycl_pool_host() {
1197+
for (int i = 0; i < MAX_POOL_SIZE; ++i) {
1198+
ggml_sycl_buffer & b = buffer_pool[i];
1199+
if (b.ptr != nullptr) {
1200+
SYCL_CHECK(CHECK_TRY_ERROR(sycl::free(b.ptr, *qptr)));
1201+
pool_size -= b.size;
1202+
}
1203+
}
1204+
GGML_ASSERT(pool_size == 0);
1205+
}
1206+
1207+
void * alloc(size_t size, size_t * actual_size) override {
1208+
size_t best_diff = 1ull << 36;
1209+
int ibest = -1;
1210+
for (int i = 0; i < MAX_POOL_SIZE; ++i) {
1211+
ggml_sycl_buffer& b = buffer_pool[i];
1212+
if (b.ptr != nullptr) {
1213+
if (b.size >= size) {
1214+
size_t diff = b.size - size;
1215+
if (diff < best_diff) {
1216+
best_diff = diff;
1217+
ibest = i;
1218+
if (!best_diff) {
1219+
void * ptr = b.ptr;
1220+
*actual_size = b.size;
1221+
b.ptr = nullptr;
1222+
b.size = 0;
1223+
return ptr;
1224+
}
1225+
}
1226+
}
1227+
}
1228+
}
1229+
if (ibest >= 0) {
1230+
ggml_sycl_buffer& b = buffer_pool[ibest];
1231+
void * ptr = b.ptr;
1232+
*actual_size = b.size;
1233+
b.ptr = nullptr;
1234+
b.size = 0;
1235+
return ptr;
1236+
}
1237+
void * ptr;
1238+
size_t look_ahead_size = (size_t) (1.05 * size);
1239+
1240+
SYCL_CHECK(
1241+
CHECK_TRY_ERROR(ptr = (void *)sycl::malloc_host(
1242+
look_ahead_size, *qptr)));
1243+
if (!ptr) {
1244+
GGML_LOG_ERROR("%s: can't allocate %lu Bytes of memory on host\n", __func__, look_ahead_size);
1245+
return nullptr;
1246+
}
1247+
1248+
*actual_size = look_ahead_size;
1249+
pool_size += look_ahead_size;
1250+
1251+
return ptr;
1252+
}
1253+
1254+
void free(void * ptr, size_t size) override {
1255+
for (int i = 0; i < MAX_POOL_SIZE; ++i) {
1256+
ggml_sycl_buffer& b = buffer_pool[i];
1257+
if (b.ptr == nullptr) {
1258+
b.ptr = ptr;
1259+
b.size = size;
1260+
return;
1261+
}
1262+
}
1263+
GGML_LOG_WARN("WARNING: sycl buffer pool full, increase MAX_POOL_SIZE\n");
1264+
SYCL_CHECK(CHECK_TRY_ERROR(sycl::free(ptr, *qptr)));
1265+
pool_size -= size;
1266+
}
1267+
};
1268+
1269+
std::unique_ptr<ggml_sycl_pool> ggml_backend_sycl_context::new_pool_for_host(queue_ptr qptr, int device) {
1270+
// return pool for the host to speed up memory management
1271+
return std::unique_ptr<ggml_sycl_pool>(new ggml_sycl_pool_host(qptr, device));
1272+
}
1273+
11761274
std::unique_ptr<ggml_sycl_pool> ggml_backend_sycl_context::new_pool_for_device(queue_ptr qptr, int device) {
11771275
// TBD: NO VMM support
11781276
// if (ggml_sycl_info().devices[device].vmm) {

0 commit comments

Comments
 (0)