Skip to content

Commit 91b4c08

Browse files
committed
add memkind as hbm allocator
1 parent f31b539 commit 91b4c08

File tree

3 files changed

+25
-1
lines changed

3 files changed

+25
-1
lines changed

CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,11 @@ endif()
502502
# Build libraries
503503
#
504504

505+
if (GGML_USE_HBM)
506+
add_definitions(-DGGML_USE_HBM)
507+
find_library(memkind memkind REQUIRED)
508+
endif()
509+
505510
add_library(ggml OBJECT
506511
ggml.c
507512
ggml.h
@@ -517,6 +522,9 @@ add_library(ggml OBJECT
517522
target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})
518523
target_compile_features(ggml PUBLIC c_std_11) # don't bump
519524
target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
525+
if (GGML_USE_HBM)
526+
target_link_libraries(ggml PUBLIC memkind)
527+
endif()
520528

521529
add_library(ggml_static STATIC $<TARGET_OBJECTS:ggml>)
522530
if (BUILD_SHARED_LIBS)

ggml.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,9 @@ typedef void * thread_ret_t;
103103
#include <sys/stat.h>
104104
#include <unistd.h>
105105

106+
#endif
107+
#ifdef GGML_USE_HBM
108+
#include <hbwmalloc.h>
106109
#endif
107110

108111
// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
@@ -197,7 +200,9 @@ typedef void * thread_ret_t;
197200
#else
198201
inline static void * ggml_aligned_malloc(size_t size) {
199202
void * aligned_memory = NULL;
200-
#ifdef GGML_USE_METAL
203+
#ifdef GGML_USE_HBM
204+
int result = hbw_posix_memalign(&aligned_memory, 16, size);
205+
#elif GGML_USE_METAL
201206
int result = posix_memalign(&aligned_memory, getpagesize(), size);
202207
#else
203208
int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN, size);
@@ -220,8 +225,12 @@ inline static void * ggml_aligned_malloc(size_t size) {
220225
return aligned_memory;
221226
}
222227
#define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size)
228+
#ifdef GGML_USE_HBM
229+
#define GGML_ALIGNED_FREE(ptr) hbw_free(ptr)
230+
#else
223231
#define GGML_ALIGNED_FREE(ptr) free(ptr)
224232
#endif
233+
#endif
225234

226235
#define UNUSED GGML_UNUSED
227236
#define SWAP(x, y, T) do { T SWAP = x; x = y; y = SWAP; } while (0)

llama.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ static void llama_log_callback_default(llama_log_level level, const char * text,
7171
#define LLAMA_MAX_SCRATCH_BUFFERS 16
7272
#endif
7373

74+
#ifdef GGML_USE_HBM
75+
#include <hbwmalloc.h>
76+
#endif
7477

7578
// available llama models
7679
enum e_model {
@@ -789,7 +792,11 @@ struct llama_model_loader {
789792
// allocate temp buffer if not using mmap
790793
if (!use_mmap && lt.data == NULL) {
791794
GGML_ASSERT(lt.ggml_tensor->backend != GGML_BACKEND_CPU);
795+
#ifdef GGML_USE_HBM
796+
lt.data = (uint8_t*)hbw_malloc(ggml_nbytes(lt.ggml_tensor));
797+
#else
792798
lt.data = (uint8_t*)malloc(ggml_nbytes(lt.ggml_tensor));
799+
#endif
793800
}
794801

795802
load_data_for(lt);

0 commit comments

Comments
 (0)