Skip to content

optimize Disjoint Pool chunks #1147

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/base_alloc/base_alloc_global.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ static void umf_ba_create_global(void) {
}

size_t smallestSize = BASE_ALLOC.ac_sizes[0];
BASE_ALLOC.smallest_ac_size_log2 = log2Utils(smallestSize);
BASE_ALLOC.smallest_ac_size_log2 = utils_msb64(smallestSize);

LOG_DEBUG("UMF base allocator created");
}
Expand All @@ -83,8 +83,8 @@ static int size_to_idx(size_t size) {
}

int isPowerOf2 = (0 == (size & (size - 1)));
int index =
(int)(log2Utils(size) + !isPowerOf2 - BASE_ALLOC.smallest_ac_size_log2);
int index = (int)(utils_msb64(size) + !isPowerOf2 -
BASE_ALLOC.smallest_ac_size_log2);

assert(index >= 0);
return index;
Expand Down
3 changes: 2 additions & 1 deletion src/critnib/critnib.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
#include "utils_assert.h"
#include "utils_common.h"
#include "utils_concurrency.h"
#include "utils_math.h"

/*
* A node that has been deleted is left untouched for this many delete
Expand Down Expand Up @@ -367,7 +368,7 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) {
}

/* and convert that to an index. */
sh_t sh = utils_mssb_index(at) & (sh_t) ~(SLICE - 1);
sh_t sh = utils_msb64(at) & (sh_t) ~(SLICE - 1);

struct critnib_node *m = alloc_node(c);
if (!m) {
Expand Down
75 changes: 32 additions & 43 deletions src/pool/pool_disjoint.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,28 +75,36 @@ static slab_t *create_slab(bucket_t *bucket) {
umf_result_t res = UMF_RESULT_SUCCESS;
umf_memory_provider_handle_t provider = bucket->pool->provider;

slab_t *slab = umf_ba_global_alloc(sizeof(*slab));
size_t num_chunks_total =
utils_max(bucket_slab_min_size(bucket) / bucket->size, 1);

// Calculate the number of 64-bit words needed.
size_t num_words =
(num_chunks_total + CHUNK_BITMAP_SIZE - 1) / CHUNK_BITMAP_SIZE;

slab_t *slab = umf_ba_global_alloc(sizeof(*slab) +
num_words * sizeof(slab->chunks[0]));
if (slab == NULL) {
LOG_ERR("allocation of new slab failed!");
return NULL;
}

slab->num_chunks_allocated = 0;
slab->first_free_chunk_idx = 0;
slab->bucket = bucket;

slab->iter.val = slab;
slab->iter.prev = slab->iter.next = NULL;

slab->num_chunks_total =
utils_max(bucket_slab_min_size(bucket) / bucket->size, 1);
slab->chunks =
umf_ba_global_alloc(sizeof(*slab->chunks) * slab->num_chunks_total);
if (slab->chunks == NULL) {
LOG_ERR("allocation of slab chunks failed!");
goto free_slab;
slab->num_chunks_total = num_chunks_total;
slab->num_words = num_words;

// set all chunks as free
memset(slab->chunks, ~0, num_words * sizeof(slab->chunks[0]));
if (num_chunks_total % CHUNK_BITMAP_SIZE) {
// clear remaining bits
slab->chunks[num_words - 1] =
((1ULL << (num_chunks_total % CHUNK_BITMAP_SIZE)) - 1);
}
memset(slab->chunks, 0, sizeof(*slab->chunks) * slab->num_chunks_total);

// if slab_min_size is not a multiple of bucket size, we would have some
// padding at the end of the slab
Expand All @@ -108,7 +116,7 @@ static slab_t *create_slab(bucket_t *bucket) {
res = umfMemoryProviderAlloc(provider, slab->slab_size, 0, &slab->mem_ptr);
if (res != UMF_RESULT_SUCCESS) {
LOG_ERR("allocation of slab data failed!");
goto free_slab_chunks;
goto free_slab;
}

// raw allocation is not available for user so mark it as inaccessible
Expand All @@ -117,9 +125,6 @@ static slab_t *create_slab(bucket_t *bucket) {
LOG_DEBUG("bucket: %p, slab_size: %zu", (void *)bucket, slab->slab_size);
return slab;

free_slab_chunks:
umf_ba_global_free(slab->chunks);

free_slab:
umf_ba_global_free(slab);
return NULL;
Expand All @@ -136,25 +141,21 @@ static void destroy_slab(slab_t *slab) {
LOG_ERR("deallocation of slab data failed!");
}

umf_ba_global_free(slab->chunks);
umf_ba_global_free(slab);
}

// return the index of the first available chunk, SIZE_MAX otherwise
static size_t slab_find_first_available_chunk_idx(const slab_t *slab) {
// use the first free chunk index as a hint for the search
for (bool *chunk = slab->chunks + slab->first_free_chunk_idx;
chunk != slab->chunks + slab->num_chunks_total; chunk++) {

// false means not used
if (*chunk == false) {
size_t idx = chunk - slab->chunks;
LOG_DEBUG("idx: %zu", idx);
return idx;
for (size_t i = 0; i < slab->num_words; i++) {
// NOTE: free chunks are represented as set bits
uint64_t word = slab->chunks[i];
if (word != 0) {
size_t bit_index = utils_lsb64(word);
size_t free_chunk = i * CHUNK_BITMAP_SIZE + bit_index;
return free_chunk;
}
}

LOG_DEBUG("idx: SIZE_MAX");
// No free chunk was found.
return SIZE_MAX;
}

Expand All @@ -167,12 +168,9 @@ static void *slab_get_chunk(slab_t *slab) {
(void *)((uintptr_t)slab->mem_ptr + chunk_idx * slab->bucket->size);

// mark chunk as used
slab->chunks[chunk_idx] = true;
slab_set_chunk_bit(slab, chunk_idx, false);
slab->num_chunks_allocated += 1;

// use the found index as the next hint
slab->first_free_chunk_idx = chunk_idx + 1;

return free_chunk;
}

Expand All @@ -195,18 +193,9 @@ static void slab_free_chunk(slab_t *slab, void *ptr) {
size_t chunk_idx = ptr_diff / slab->bucket->size;

// Make sure that the chunk was allocated
assert(slab->chunks[chunk_idx] && "double free detected");
slab->chunks[chunk_idx] = false;
assert(slab_read_chunk_bit(slab, chunk_idx) == 0 && "double free detected");
slab_set_chunk_bit(slab, chunk_idx, true);
slab->num_chunks_allocated -= 1;

if (chunk_idx < slab->first_free_chunk_idx) {
slab->first_free_chunk_idx = chunk_idx;
}

LOG_DEBUG("chunk_idx: %zu, num_chunks_allocated: %zu, "
"first_free_chunk_idx: %zu",
chunk_idx, slab->num_chunks_allocated,
slab->first_free_chunk_idx);
}

static bool slab_has_avail(const slab_t *slab) {
Expand Down Expand Up @@ -466,7 +455,7 @@ static size_t size_to_idx(disjoint_pool_t *pool, size_t size) {
}

// get the position of the leftmost set bit
size_t position = getLeftmostSetBitPos(size);
size_t position = utils_msb64(size);

bool is_power_of_2 = 0 == (size & (size - 1));
bool larger_than_halfway_between_powers_of_2 =
Expand Down Expand Up @@ -622,7 +611,7 @@ umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider,
Size1 = utils_max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE);

// Calculate the exponent for min_bucket_size used for finding buckets.
disjoint_pool->min_bucket_size_exp = (size_t)log2Utils(Size1);
disjoint_pool->min_bucket_size_exp = (size_t)utils_msb64(Size1);
disjoint_pool->default_shared_limits =
umfDisjointPoolSharedLimitsCreate(SIZE_MAX);

Expand Down
35 changes: 29 additions & 6 deletions src/pool/pool_disjoint_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include "critnib/critnib.h"
#include "utils_concurrency.h"

#define CHUNK_BITMAP_SIZE 64

typedef struct bucket_t bucket_t;
typedef struct slab_t slab_t;
typedef struct slab_list_item_t slab_list_item_t;
Expand Down Expand Up @@ -81,23 +83,24 @@ typedef struct slab_t {
void *mem_ptr;
size_t slab_size;

// Represents the current state of each chunk: if the bit is set, the
// chunk is allocated; otherwise, the chunk is free for allocation
bool *chunks;
size_t num_chunks_total;

// Num of 64-bit words needed to store chunk state
size_t num_words;

// Total number of allocated chunks at the moment.
size_t num_chunks_allocated;

// The bucket which the slab belongs to
bucket_t *bucket;

// Hints where to start search for free chunk in a slab
size_t first_free_chunk_idx;

// Store iterator to the corresponding node in avail/unavail list
// to achieve O(1) removal
slab_list_item_t iter;

// Represents the current state of each chunk: if the bit is clear, the
// chunk is allocated; otherwise, the chunk is free for allocation
uint64_t chunks[];
} slab_t;

typedef struct umf_disjoint_pool_shared_limits_t {
Expand Down Expand Up @@ -158,4 +161,24 @@ typedef struct disjoint_pool_t {
size_t provider_min_page_size;
} disjoint_pool_t;

static inline void slab_set_chunk_bit(slab_t *slab, size_t index, bool value) {
assert(index < slab->num_chunks_total && "Index out of range");

size_t word_index = index / CHUNK_BITMAP_SIZE;
unsigned bit_index = index % CHUNK_BITMAP_SIZE;
if (value) {
slab->chunks[word_index] |= (1ULL << bit_index);
} else {
slab->chunks[word_index] &= ~(1ULL << bit_index);
}
}

static inline int slab_read_chunk_bit(const slab_t *slab, size_t index) {
assert(index < slab->num_chunks_total && "Index out of range");

size_t word_index = index / CHUNK_BITMAP_SIZE;
unsigned bit_index = index % CHUNK_BITMAP_SIZE;
return (slab->chunks[word_index] >> bit_index) & 1;
}

#endif // UMF_POOL_DISJOINT_INTERNAL_H
7 changes: 3 additions & 4 deletions src/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2023-2024 Intel Corporation
# Copyright (C) 2023-2025 Intel Corporation
# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

Expand All @@ -7,15 +7,14 @@ include(FindThreads)

set(UMF_UTILS_SOURCES_COMMON utils_common.c utils_log.c utils_load_library.c)

set(UMF_UTILS_SOURCES_POSIX utils_posix_common.c utils_posix_concurrency.c
utils_posix_math.c)
set(UMF_UTILS_SOURCES_POSIX utils_posix_common.c utils_posix_concurrency.c)

set(UMF_UTILS_SOURCES_LINUX utils_linux_common.c)

set(UMF_UTILS_SOURCES_MACOSX utils_macosx_common.c)

set(UMF_UTILS_SOURCES_WINDOWS utils_windows_common.c
utils_windows_concurrency.c utils_windows_math.c)
utils_windows_concurrency.c)

if(UMF_USE_VALGRIND)
if(UMF_USE_ASAN
Expand Down
15 changes: 0 additions & 15 deletions src/utils/utils_concurrency.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,18 +89,6 @@ void utils_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void));

#if defined(_WIN32)

static inline unsigned char utils_lssb_index(long long value) {
unsigned long ret;
_BitScanForward64(&ret, value);
return (unsigned char)ret;
}

static inline unsigned char utils_mssb_index(long long value) {
unsigned long ret;
_BitScanReverse64(&ret, value);
return (unsigned char)ret;
}

// There is no good way to do atomic_load on windows...
static inline void utils_atomic_load_acquire_u64(uint64_t *ptr, uint64_t *out) {
// NOTE: Windows cl complains about direct accessing 'ptr' which is next
Expand Down Expand Up @@ -166,9 +154,6 @@ static inline bool utils_compare_exchange_u64(uint64_t *ptr, uint64_t *expected,

#else // !defined(_WIN32)

#define utils_lssb_index(x) ((unsigned char)__builtin_ctzll(x))
#define utils_mssb_index(x) ((unsigned char)(63 - __builtin_clzll(x)))

static inline void utils_atomic_load_acquire_u64(uint64_t *ptr, uint64_t *out) {
ASSERT_IS_ALIGNED((uintptr_t)ptr, 8);
ASSERT_IS_ALIGNED((uintptr_t)out, 8);
Expand Down
50 changes: 46 additions & 4 deletions src/utils/utils_math.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
*
* Copyright (C) 2023-2024 Intel Corporation
* Copyright (C) 2023-2025 Intel Corporation
*
* Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Expand All @@ -11,16 +11,58 @@
#define UMF_MATH_H 1

#include <assert.h>
#include <limits.h>
#include <stddef.h>
#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

size_t getLeftmostSetBitPos(size_t num);
#if defined(_WIN32)

// Logarithm is an index of the most significant non-zero bit.
static inline size_t log2Utils(size_t num) { return getLeftmostSetBitPos(num); }
#include "utils_windows_intrin.h"

#pragma intrinsic(_BitScanReverse64)
#pragma intrinsic(_BitScanForward64)

// Retrieves the position of the leftmost set bit.
// The position of the bit is counted from 0
// e.g. for 01000011110 the position equals 9.
static inline size_t utils_msb64(uint64_t num) {
assert(num != 0 &&
"Finding leftmost set bit when number equals zero is undefined");
unsigned long index = 0;
_BitScanReverse64(&index, num);
return (size_t)index;
}

static inline size_t utils_lsb64(uint64_t num) {
assert(num != 0 &&
"Finding rightmost set bit when number equals zero is undefined");
unsigned long index = 0;
_BitScanForward64(&index, num);
return (size_t)index;
}

#else // !defined(_WIN32)

// Retrieves the position of the leftmost set bit.
// The position of the bit is counted from 0
// e.g. for 01000011110 the position equals 9.
static inline size_t utils_msb64(uint64_t num) {
assert(num != 0 &&
"Finding leftmost set bit when number equals zero is undefined");
return 63 - __builtin_clzll(num);
}

static inline size_t utils_lsb64(uint64_t num) {
assert(num != 0 &&
"Finding rightmost set bit when number equals zero is undefined");
return __builtin_ctzll(num);
}

#endif // !defined(_WIN32)

#ifdef __cplusplus
}
Expand Down
20 changes: 0 additions & 20 deletions src/utils/utils_posix_math.c

This file was deleted.

Loading