Skip to content

Commit 36e6cc8

Browse files
committed
Track first free atb for multiple block sizes.
This speeds up cases where no single block allocations happen while Python code is allocating other block sizes.
1 parent a4d197a commit 36e6cc8

File tree

3 files changed

+59
-32
lines changed

3 files changed

+59
-32
lines changed

py/gc.c

Lines changed: 50 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,13 @@ void gc_init(void *start, void *end) {
150150
#endif
151151

152152
// Set first free ATB index to the start of the heap.
153-
MP_STATE_MEM(gc_first_free_atb_index) = 0;
153+
for (size_t i = 0; i < MICROPY_ATB_INDICES; i++) {
154+
MP_STATE_MEM(gc_first_free_atb_index)[i] = 0;
155+
}
156+
154157
// Set last free ATB index to the end of the heap.
155158
MP_STATE_MEM(gc_last_free_atb_index) = MP_STATE_MEM(gc_alloc_table_byte_len) - 1;
159+
156160
// Set the lowest long lived ptr to the end of the heap to start. This will be lowered as long
157161
// lived objects are allocated.
158162
MP_STATE_MEM(gc_lowest_long_lived_ptr) = (void*) PTR_FROM_BLOCK(MP_STATE_MEM(gc_alloc_table_byte_len * BLOCKS_PER_ATB));
@@ -387,7 +391,9 @@ void gc_collect_root(void **ptrs, size_t len) {
387391
void gc_collect_end(void) {
388392
gc_deal_with_stack_overflow();
389393
gc_sweep();
390-
MP_STATE_MEM(gc_first_free_atb_index) = 0;
394+
for (size_t i = 0; i < MICROPY_ATB_INDICES; i++) {
395+
MP_STATE_MEM(gc_first_free_atb_index)[i] = 0;
396+
}
391397
MP_STATE_MEM(gc_last_free_atb_index) = MP_STATE_MEM(gc_alloc_table_byte_len) - 1;
392398
MP_STATE_MEM(gc_lock_depth)--;
393399
GC_EXIT();
@@ -513,14 +519,16 @@ void *gc_alloc(size_t n_bytes, bool has_finaliser, bool long_lived) {
513519
size_t crossover_block = BLOCK_FROM_PTR(MP_STATE_MEM(gc_lowest_long_lived_ptr));
514520
while (keep_looking) {
515521
int8_t direction = 1;
516-
size_t start = MP_STATE_MEM(gc_first_free_atb_index);
522+
size_t bucket = MIN(n_blocks, MICROPY_ATB_INDICES) - 1;
523+
size_t first_free = MP_STATE_MEM(gc_first_free_atb_index)[bucket];
524+
size_t start = first_free;
517525
if (long_lived) {
518526
direction = -1;
519527
start = MP_STATE_MEM(gc_last_free_atb_index);
520528
}
521529
n_free = 0;
522530
// look for a run of n_blocks available blocks
523-
for (size_t i = start; keep_looking && MP_STATE_MEM(gc_first_free_atb_index) <= i && i <= MP_STATE_MEM(gc_last_free_atb_index); i += direction) {
531+
for (size_t i = start; keep_looking && first_free <= i && i <= MP_STATE_MEM(gc_last_free_atb_index); i += direction) {
524532
byte a = MP_STATE_MEM(gc_alloc_table_start)[i];
525533
// Four ATB states are packed into a single byte.
526534
int j = 0;
@@ -565,22 +573,24 @@ void *gc_alloc(size_t n_bytes, bool has_finaliser, bool long_lived) {
565573

566574
// Found free space ending at found_block inclusive.
567575
// Also, set last free ATB index to block after last block we found, for start of
568-
// next scan. To reduce fragmentation, we only do this if we were looking
569-
// for a single free block, which guarantees that there are no free blocks
570-
// before this one. Also, whenever we free or shrink a block we must check
571-
// if this index needs adjusting (see gc_realloc and gc_free).
576+
// next scan. Also, whenever we free or shrink a block we must check if this index needs
577+
// adjusting (see gc_realloc and gc_free).
572578
if (!long_lived) {
573579
end_block = found_block;
574580
start_block = found_block - n_free + 1;
575-
if (n_blocks == 1) {
576-
MP_STATE_MEM(gc_first_free_atb_index) = (found_block + 1) / BLOCKS_PER_ATB;
581+
if (n_blocks < MICROPY_ATB_INDICES) {
582+
size_t next_free_atb = (found_block + n_blocks) / BLOCKS_PER_ATB;
583+
// Update all atb indices for larger blocks too.
584+
for (size_t i = n_blocks - 1; i < MICROPY_ATB_INDICES; i++) {
585+
MP_STATE_MEM(gc_first_free_atb_index)[i] = next_free_atb;
586+
}
577587
}
578588
} else {
579589
start_block = found_block;
580590
end_block = found_block + n_free - 1;
581-
if (n_blocks == 1) {
582-
MP_STATE_MEM(gc_last_free_atb_index) = (found_block - 1) / BLOCKS_PER_ATB;
583-
}
591+
// Always update the bounds of the long lived area because we assume it is contiguous. (It
592+
// can still be reset by a sweep.)
593+
MP_STATE_MEM(gc_last_free_atb_index) = (found_block - 1) / BLOCKS_PER_ATB;
584594
}
585595

586596
#ifdef LOG_HEAP_ACTIVITY
@@ -676,30 +686,37 @@ void gc_free(void *ptr) {
676686
}
677687
// get the GC block number corresponding to this pointer
678688
assert(VERIFY_PTR(ptr));
679-
size_t block = BLOCK_FROM_PTR(ptr);
680-
assert(ATB_GET_KIND(block) == AT_HEAD);
689+
size_t start_block = BLOCK_FROM_PTR(ptr);
690+
assert(ATB_GET_KIND(start_block) == AT_HEAD);
681691

682692
#if MICROPY_ENABLE_FINALISER
683-
FTB_CLEAR(block);
693+
FTB_CLEAR(start_block);
684694
#endif
685695

686-
// set the last_free pointer to this block if it's earlier in the heap
687-
if (block / BLOCKS_PER_ATB < MP_STATE_MEM(gc_first_free_atb_index)) {
688-
MP_STATE_MEM(gc_first_free_atb_index) = block / BLOCKS_PER_ATB;
689-
}
690-
if (block / BLOCKS_PER_ATB > MP_STATE_MEM(gc_last_free_atb_index)) {
691-
MP_STATE_MEM(gc_last_free_atb_index) = block / BLOCKS_PER_ATB;
692-
}
693-
694696
// free head and all of its tail blocks
695-
#ifdef LOG_HEAP_ACTIVITY
696-
gc_log_change(block, 0);
697-
#endif
697+
#ifdef LOG_HEAP_ACTIVITY
698+
gc_log_change(start_block, 0);
699+
#endif
700+
size_t block = start_block;
698701
do {
699702
ATB_ANY_TO_FREE(block);
700703
block += 1;
701704
} while (ATB_GET_KIND(block) == AT_TAIL);
702705

706+
// Update the first free pointer for our size only. Not much calls gc_free directly so there
707+
// is decent chance we'll want to allocate this size again. By only updating the specific
708+
// size we don't risk something smaller fitting in.
709+
size_t n_blocks = block - start_block;
710+
size_t bucket = MIN(n_blocks, MICROPY_ATB_INDICES) - 1;
711+
size_t new_free_atb = start_block / BLOCKS_PER_ATB;
712+
if (new_free_atb < MP_STATE_MEM(gc_first_free_atb_index)[bucket]) {
713+
MP_STATE_MEM(gc_first_free_atb_index)[bucket] = new_free_atb;
714+
}
715+
// set the last_free pointer to this block if it's earlier in the heap
716+
if (new_free_atb > MP_STATE_MEM(gc_last_free_atb_index)) {
717+
MP_STATE_MEM(gc_last_free_atb_index) = new_free_atb;
718+
}
719+
703720
GC_EXIT();
704721

705722
#if EXTENSIVE_HEAP_PROFILING
@@ -870,11 +887,13 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) {
870887
}
871888

872889
// set the last_free pointer to end of this block if it's earlier in the heap
873-
if ((block + new_blocks) / BLOCKS_PER_ATB < MP_STATE_MEM(gc_first_free_atb_index)) {
874-
MP_STATE_MEM(gc_first_free_atb_index) = (block + new_blocks) / BLOCKS_PER_ATB;
890+
size_t new_free_atb = (block + new_blocks) / BLOCKS_PER_ATB;
891+
size_t bucket = MIN(n_blocks - new_blocks, MICROPY_ATB_INDICES) - 1;
892+
if (new_free_atb < MP_STATE_MEM(gc_first_free_atb_index)[bucket]) {
893+
MP_STATE_MEM(gc_first_free_atb_index)[bucket] = new_free_atb;
875894
}
876-
if ((block + new_blocks) / BLOCKS_PER_ATB > MP_STATE_MEM(gc_last_free_atb_index)) {
877-
MP_STATE_MEM(gc_last_free_atb_index) = (block + new_blocks) / BLOCKS_PER_ATB;
895+
if (new_free_atb > MP_STATE_MEM(gc_last_free_atb_index)) {
896+
MP_STATE_MEM(gc_last_free_atb_index) = new_free_atb;
878897
}
879898

880899
GC_EXIT();

py/mpconfig.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,14 @@
244244
#define alloca(x) m_malloc(x)
245245
#endif
246246

247+
// Number of atb indices to cache. Allocations of fewer blocks will be faster
248+
// because the search will be accelerated by the index cache. This only applies
249+
// to short lived allocations because we assume the long lived allocations are
250+
// contiguous.
251+
#ifndef MICROPY_ATB_INDICES
252+
#define MICROPY_ATB_INDICES (8)
253+
#endif
254+
247255
/*****************************************************************************/
248256
/* MicroPython emitters */
249257

py/mpstate.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ typedef struct _mp_state_mem_t {
9292
size_t gc_alloc_threshold;
9393
#endif
9494

95-
size_t gc_first_free_atb_index;
95+
size_t gc_first_free_atb_index[MICROPY_ATB_INDICES];
9696
size_t gc_last_free_atb_index;
9797

9898
#if MICROPY_PY_GC_COLLECT_RETVAL

0 commit comments

Comments
 (0)