35
35
#include <omp.h>
36
36
#endif
37
37
38
- #ifdef GGML_USE_METAL
39
- #include <unistd.h>
40
- #endif
41
-
42
38
#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
43
39
#undef GGML_USE_LLAMAFILE
44
40
#endif
@@ -189,6 +185,8 @@ typedef pthread_t ggml_thread_t;
189
185
#endif
190
186
191
187
#if defined(__APPLE__)
188
+ #include <unistd.h>
189
+ #include <mach/mach.h>
192
190
#include <TargetConditionals.h>
193
191
#endif
194
192
@@ -386,22 +384,40 @@ void ggml_log_callback_default(enum ggml_log_level level, const char * text, voi
386
384
//#define GGML_SOFT_MAX_ACCELERATE
387
385
#endif
388
386
387
+
388
+ void * ggml_aligned_malloc(size_t size) {
389
389
#if defined(_MSC_VER) || defined(__MINGW32__)
390
- #define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
391
- #define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr)
390
+ return _aligned_malloc(size, TENSOR_ALIGNMENT);
392
391
#else
393
- inline static void * ggml_aligned_malloc(size_t size) {
394
392
if (size == 0) {
395
393
GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
396
394
return NULL;
397
395
}
398
396
void * aligned_memory = NULL;
399
397
#ifdef GGML_USE_CPU_HBM
400
- int result = hbw_posix_memalign(&aligned_memory, 16, size);
398
+ int result = hbw_posix_memalign(&aligned_memory, TENSOR_ALIGNMENT, size);
399
+ #elif TARGET_OS_OSX
400
+ kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
401
+ int result = EFAULT;
402
+ switch (alloc_status) {
403
+ case KERN_SUCCESS:
404
+ result = 0;
405
+ break;
406
+ case KERN_INVALID_ADDRESS:
407
+ result = EINVAL;
408
+ break;
409
+ case KERN_NO_SPACE:
410
+ result = ENOMEM;
411
+ break;
412
+ default:
413
+ result = EFAULT;
414
+ break;
415
+ }
401
416
#elif GGML_USE_METAL
402
- int result = posix_memalign(&aligned_memory, sysconf(_SC_PAGESIZE), size);
417
+ const long page_size = sysconf(_SC_PAGESIZE);
418
+ int result = posix_memalign(&aligned_memory, MAX(TENSOR_ALIGNMENT, page_size), size);
403
419
#else
404
- int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN , size);
420
+ int result = posix_memalign(&aligned_memory, TENSOR_ALIGNMENT , size);
405
421
#endif
406
422
if (result != 0) {
407
423
// Handle allocation failure
@@ -419,14 +435,26 @@ inline static void * ggml_aligned_malloc(size_t size) {
419
435
return NULL;
420
436
}
421
437
return aligned_memory;
438
+ #endif
422
439
}
423
- #define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size)
424
- #ifdef GGML_USE_CPU_HBM
425
- #define GGML_ALIGNED_FREE(ptr) if(NULL != ptr) hbw_free(ptr)
440
+
441
+ void ggml_aligned_free(void * ptr, size_t size) {
442
+ GGML_UNUSED(size);
443
+ #if defined(_MSC_VER) || defined(__MINGW32__)
444
+ _aligned_free(ptr);
445
+ #elif GGML_USE_CPU_HBM
446
+ if (ptr != NULL) {
447
+ hbw_free(ptr);
448
+ }
449
+ #elif TARGET_OS_OSX
450
+ if (ptr != NULL) {
451
+ vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size);
452
+ }
426
453
#else
427
- #define GGML_ALIGNED_FREE(ptr) free(ptr)
428
- #endif
454
+ free(ptr);
429
455
#endif
456
+ }
457
+
430
458
431
459
inline static void * ggml_malloc(size_t size) {
432
460
if (size == 0) {
@@ -3869,7 +3897,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
3869
3897
3870
3898
*ctx = (struct ggml_context) {
3871
3899
/*.mem_size =*/ mem_size,
3872
- /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC (mem_size),
3900
+ /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc (mem_size),
3873
3901
/*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
3874
3902
/*.no_alloc =*/ params.no_alloc,
3875
3903
/*.no_alloc_save =*/ params.no_alloc,
@@ -3909,7 +3937,7 @@ void ggml_free(struct ggml_context * ctx) {
3909
3937
__func__, i, ggml_used_mem(ctx));
3910
3938
3911
3939
if (ctx->mem_buffer_owned) {
3912
- GGML_ALIGNED_FREE (ctx->mem_buffer);
3940
+ ggml_aligned_free (ctx->mem_buffer, ctx->mem_size );
3913
3941
}
3914
3942
3915
3943
found = true;
@@ -19608,9 +19636,10 @@ static void ggml_thread_cpumask_next(const bool * global_mask, bool * local_mask
19608
19636
void ggml_threadpool_free(struct ggml_threadpool* threadpool) {
19609
19637
if (!threadpool) return;
19610
19638
19639
+ const int n_threads = threadpool->n_threads_max;
19640
+
19611
19641
#ifndef GGML_USE_OPENMP
19612
19642
struct ggml_compute_state* workers = threadpool->workers;
19613
- const int n_threads = threadpool->n_threads_max;
19614
19643
19615
19644
ggml_mutex_lock(&threadpool->mutex);
19616
19645
@@ -19630,8 +19659,9 @@ void ggml_threadpool_free(struct ggml_threadpool* threadpool) {
19630
19659
ggml_cond_destroy(&threadpool->cond);
19631
19660
#endif // GGML_USE_OPENMP
19632
19661
19633
- GGML_ALIGNED_FREE(threadpool->workers);
19634
- GGML_ALIGNED_FREE(threadpool);
19662
+ const size_t workers_size = sizeof(struct ggml_compute_state) * n_threads;
19663
+ ggml_aligned_free(threadpool->workers, workers_size);
19664
+ ggml_aligned_free(threadpool, sizeof(struct ggml_threadpool));
19635
19665
}
19636
19666
19637
19667
#ifndef GGML_USE_OPENMP
@@ -20063,7 +20093,7 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
20063
20093
struct ggml_cplan * cplan) {
20064
20094
20065
20095
struct ggml_threadpool * threadpool =
20066
- GGML_ALIGNED_MALLOC (sizeof(struct ggml_threadpool));
20096
+ ggml_aligned_malloc (sizeof(struct ggml_threadpool));
20067
20097
{
20068
20098
threadpool->cgraph = cgraph;
20069
20099
threadpool->cplan = cplan;
@@ -20084,7 +20114,7 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
20084
20114
20085
20115
// Allocate and init workers state
20086
20116
const size_t workers_size = sizeof(struct ggml_compute_state) * tpp->n_threads;
20087
- struct ggml_compute_state * workers = GGML_ALIGNED_MALLOC (workers_size);
20117
+ struct ggml_compute_state * workers = ggml_aligned_malloc (workers_size);
20088
20118
20089
20119
memset(workers, 0, workers_size);
20090
20120
for (int j = 0; j < tpp->n_threads; j++) {
0 commit comments