Skip to content

Track first free atbs for multiple block sizes instead of just 1 #2614

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 50 additions & 31 deletions py/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,13 @@ void gc_init(void *start, void *end) {
#endif

// Set first free ATB index to the start of the heap.
MP_STATE_MEM(gc_first_free_atb_index) = 0;
for (size_t i = 0; i < MICROPY_ATB_INDICES; i++) {
MP_STATE_MEM(gc_first_free_atb_index)[i] = 0;
}

// Set last free ATB index to the end of the heap.
MP_STATE_MEM(gc_last_free_atb_index) = MP_STATE_MEM(gc_alloc_table_byte_len) - 1;

// Set the lowest long lived ptr to the end of the heap to start. This will be lowered as long
// lived objects are allocated.
MP_STATE_MEM(gc_lowest_long_lived_ptr) = (void*) PTR_FROM_BLOCK(MP_STATE_MEM(gc_alloc_table_byte_len * BLOCKS_PER_ATB));
Expand Down Expand Up @@ -387,7 +391,9 @@ void gc_collect_root(void **ptrs, size_t len) {
void gc_collect_end(void) {
gc_deal_with_stack_overflow();
gc_sweep();
MP_STATE_MEM(gc_first_free_atb_index) = 0;
for (size_t i = 0; i < MICROPY_ATB_INDICES; i++) {
MP_STATE_MEM(gc_first_free_atb_index)[i] = 0;
}
MP_STATE_MEM(gc_last_free_atb_index) = MP_STATE_MEM(gc_alloc_table_byte_len) - 1;
MP_STATE_MEM(gc_lock_depth)--;
GC_EXIT();
Expand Down Expand Up @@ -513,14 +519,16 @@ void *gc_alloc(size_t n_bytes, bool has_finaliser, bool long_lived) {
size_t crossover_block = BLOCK_FROM_PTR(MP_STATE_MEM(gc_lowest_long_lived_ptr));
while (keep_looking) {
int8_t direction = 1;
size_t start = MP_STATE_MEM(gc_first_free_atb_index);
size_t bucket = MIN(n_blocks, MICROPY_ATB_INDICES) - 1;
size_t first_free = MP_STATE_MEM(gc_first_free_atb_index)[bucket];
size_t start = first_free;
if (long_lived) {
direction = -1;
start = MP_STATE_MEM(gc_last_free_atb_index);
}
n_free = 0;
// look for a run of n_blocks available blocks
for (size_t i = start; keep_looking && MP_STATE_MEM(gc_first_free_atb_index) <= i && i <= MP_STATE_MEM(gc_last_free_atb_index); i += direction) {
for (size_t i = start; keep_looking && first_free <= i && i <= MP_STATE_MEM(gc_last_free_atb_index); i += direction) {
byte a = MP_STATE_MEM(gc_alloc_table_start)[i];
// Four ATB states are packed into a single byte.
int j = 0;
Expand Down Expand Up @@ -565,22 +573,24 @@ void *gc_alloc(size_t n_bytes, bool has_finaliser, bool long_lived) {

// Found free space ending at found_block inclusive.
// Also, set last free ATB index to block after last block we found, for start of
// next scan. To reduce fragmentation, we only do this if we were looking
// for a single free block, which guarantees that there are no free blocks
// before this one. Also, whenever we free or shrink a block we must check
// if this index needs adjusting (see gc_realloc and gc_free).
// next scan. Also, whenever we free or shrink a block we must check if this index needs
// adjusting (see gc_realloc and gc_free).
if (!long_lived) {
end_block = found_block;
start_block = found_block - n_free + 1;
if (n_blocks == 1) {
MP_STATE_MEM(gc_first_free_atb_index) = (found_block + 1) / BLOCKS_PER_ATB;
if (n_blocks < MICROPY_ATB_INDICES) {
size_t next_free_atb = (found_block + n_blocks) / BLOCKS_PER_ATB;
// Update all atb indices for larger blocks too.
for (size_t i = n_blocks - 1; i < MICROPY_ATB_INDICES; i++) {
MP_STATE_MEM(gc_first_free_atb_index)[i] = next_free_atb;
}
}
} else {
start_block = found_block;
end_block = found_block + n_free - 1;
if (n_blocks == 1) {
MP_STATE_MEM(gc_last_free_atb_index) = (found_block - 1) / BLOCKS_PER_ATB;
}
// Always update the bounds of the long lived area because we assume it is contiguous. (It
// can still be reset by a sweep.)
MP_STATE_MEM(gc_last_free_atb_index) = (found_block - 1) / BLOCKS_PER_ATB;
}

#ifdef LOG_HEAP_ACTIVITY
Expand Down Expand Up @@ -676,30 +686,37 @@ void gc_free(void *ptr) {
}
// get the GC block number corresponding to this pointer
assert(VERIFY_PTR(ptr));
size_t block = BLOCK_FROM_PTR(ptr);
assert(ATB_GET_KIND(block) == AT_HEAD);
size_t start_block = BLOCK_FROM_PTR(ptr);
assert(ATB_GET_KIND(start_block) == AT_HEAD);

#if MICROPY_ENABLE_FINALISER
FTB_CLEAR(block);
FTB_CLEAR(start_block);
#endif

// set the last_free pointer to this block if it's earlier in the heap
if (block / BLOCKS_PER_ATB < MP_STATE_MEM(gc_first_free_atb_index)) {
MP_STATE_MEM(gc_first_free_atb_index) = block / BLOCKS_PER_ATB;
}
if (block / BLOCKS_PER_ATB > MP_STATE_MEM(gc_last_free_atb_index)) {
MP_STATE_MEM(gc_last_free_atb_index) = block / BLOCKS_PER_ATB;
}

// free head and all of its tail blocks
#ifdef LOG_HEAP_ACTIVITY
gc_log_change(block, 0);
#endif
#ifdef LOG_HEAP_ACTIVITY
gc_log_change(start_block, 0);
#endif
size_t block = start_block;
do {
ATB_ANY_TO_FREE(block);
block += 1;
} while (ATB_GET_KIND(block) == AT_TAIL);

// Update the first free pointer for our size only. Not much calls gc_free directly so there
// is decent chance we'll want to allocate this size again. By only updating the specific
// size we don't risk something smaller fitting in.
size_t n_blocks = block - start_block;
size_t bucket = MIN(n_blocks, MICROPY_ATB_INDICES) - 1;
size_t new_free_atb = start_block / BLOCKS_PER_ATB;
if (new_free_atb < MP_STATE_MEM(gc_first_free_atb_index)[bucket]) {
MP_STATE_MEM(gc_first_free_atb_index)[bucket] = new_free_atb;
}
// set the last_free pointer to this block if it's earlier in the heap
if (new_free_atb > MP_STATE_MEM(gc_last_free_atb_index)) {
MP_STATE_MEM(gc_last_free_atb_index) = new_free_atb;
}

GC_EXIT();

#if EXTENSIVE_HEAP_PROFILING
Expand Down Expand Up @@ -870,11 +887,13 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) {
}

// set the last_free pointer to end of this block if it's earlier in the heap
if ((block + new_blocks) / BLOCKS_PER_ATB < MP_STATE_MEM(gc_first_free_atb_index)) {
MP_STATE_MEM(gc_first_free_atb_index) = (block + new_blocks) / BLOCKS_PER_ATB;
size_t new_free_atb = (block + new_blocks) / BLOCKS_PER_ATB;
size_t bucket = MIN(n_blocks - new_blocks, MICROPY_ATB_INDICES) - 1;
if (new_free_atb < MP_STATE_MEM(gc_first_free_atb_index)[bucket]) {
MP_STATE_MEM(gc_first_free_atb_index)[bucket] = new_free_atb;
}
if ((block + new_blocks) / BLOCKS_PER_ATB > MP_STATE_MEM(gc_last_free_atb_index)) {
MP_STATE_MEM(gc_last_free_atb_index) = (block + new_blocks) / BLOCKS_PER_ATB;
if (new_free_atb > MP_STATE_MEM(gc_last_free_atb_index)) {
MP_STATE_MEM(gc_last_free_atb_index) = new_free_atb;
}

GC_EXIT();
Expand Down
8 changes: 8 additions & 0 deletions py/mpconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,14 @@
#define alloca(x) m_malloc(x)
#endif

// Number of atb indices to cache. Allocations of fewer blocks will be faster
// because the search will be accelerated by the index cache. This only applies
// to short lived allocations because we assume the long lived allocations are
// contiguous.
#ifndef MICROPY_ATB_INDICES
#define MICROPY_ATB_INDICES (8)
#endif

/*****************************************************************************/
/* MicroPython emitters */

Expand Down
2 changes: 1 addition & 1 deletion py/mpstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ typedef struct _mp_state_mem_t {
size_t gc_alloc_threshold;
#endif

size_t gc_first_free_atb_index;
size_t gc_first_free_atb_index[MICROPY_ATB_INDICES];
size_t gc_last_free_atb_index;

#if MICROPY_PY_GC_COLLECT_RETVAL
Expand Down
2 changes: 1 addition & 1 deletion tools/gc_activity.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ correct port. GDB is usually :3333 and JLink is :2331.
Now, run gdb from your port directory:

```
arm-none-eabi-gdb -x ../tools/output_gc_until_repl.txt build-metro_m0_express/firmware.elf
arm-none-eabi-gdb -x ../../tools/output_gc_until_repl.txt build-metro_m0_express/firmware.elf
```

This will take a little time while it breaks, backtraces and continues for every
Expand Down
141 changes: 141 additions & 0 deletions tools/gc_activity_between_collects.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import sys
import json

# Map start block to current allocation info.
current_heap = {}
allocation_history = []
root = {}

def change_root(trace, size):
level = root
for frame in reversed(trace):
file_location = frame[1]
if file_location not in level:
level[file_location] = {"blocks": 0,
"file": file_location,
"function": frame[2],
"subcalls": {}}
level[file_location]["blocks"] += size
level = level[file_location]["subcalls"]

total_actions = 0
non_single_block_streak = 0
max_nsbs = 0
last_action = None
last_total_actions = 0
count = 0
actions = {}
last_ticks_ms = 0
ticks_ms = 0
block_sizes = {}
allocation_sources = {}
with open(sys.argv[1], "r") as f:
for line in f:
if not line.strip():
break
for line in f:
action = None
if line.startswith("Breakpoint 2"):
break
next(f) # throw away breakpoint code line
# print(next(f)) # first frame
block = 0
size = 0
trace = []
for line in f:
# print(line.strip())
if line[0] == "#":
frame = line.strip().split()
if frame[1].startswith("0x"):
trace.append((frame[1], frame[-1], frame[3]))
else:
trace.append(("0x0", frame[-1], frame[1]))
elif line[0] == "$":
#print(line.strip().split()[-1])
block = int(line.strip().split()[-1][2:], 16)
next_line = next(f)
size = int(next_line.strip().split()[-1][2:], 16)
# next_line = next(f)
# ticks_ms = int(next_line.strip().split()[-1][2:], 16)
if not line.strip():
break

action = "unknown"
if block not in current_heap:
current_heap[block] = {"start_block": block, "size": size, "start_trace": trace, "start_time": total_actions}
action = "alloc"
if size == 1:
max_nsbs = max(max_nsbs, non_single_block_streak)
non_single_block_streak = 0
else:
non_single_block_streak += 1
#change_root(trace, size)
if size not in block_sizes:
block_sizes[size] = 0
source = trace[-1][-1]
if source not in allocation_sources:
print(trace)
allocation_sources[source] = 0
allocation_sources[source] += 1
block_sizes[size] += 1
else:
alloc = current_heap[block]
alloc["end_trace"] = trace
alloc["end_time"] = total_actions
change_root(alloc["start_trace"], -1 * alloc["size"])
if size > 0:
action = "realloc"
current_heap[block] = {"start_block": block, "size": size, "start_trace": trace, "start_time": total_actions}
#change_root(trace, size)
else:
action = "free"
if trace[0][2] == "gc_sweep":
action = "sweep"
non_single_block_streak = 0
if (trace[3][2] == "py_gc_collect" or (trace[3][2] == "gc_deinit" and count > 1)) and last_action != "sweep":
print(ticks_ms - last_ticks_ms, total_actions - last_total_actions, "gc.collect", max_nsbs)
print(actions)
print(block_sizes)
print(allocation_sources)
actions = {}
block_sizes = {}
allocation_sources = {}
if count % 2 == 0:
print()
count += 1
last_total_actions = total_actions
last_ticks_ms = ticks_ms
max_nsbs = 0
del current_heap[block]
alloc["end_cause"] = action
allocation_history.append(alloc)
if action not in actions:
actions[action] = 0
actions[action] += 1
last_action = action
#print(total_actions, non_single_block_streak, action, block, size)
total_actions += 1
print(actions)
print(max_nsbs)
print()

for alloc in current_heap.values():
alloc["end_trace"] = ""
alloc["end_time"] = total_actions
allocation_history.append(alloc)

def print_frame(frame, indent=0):
for key in sorted(frame):
if not frame[key]["blocks"] or key.startswith("../py/malloc.c") or key.startswith("../py/gc.c"):
continue
print(" " * (indent - 1), key, frame[key]["function"], frame[key]["blocks"], "blocks")
print_frame(frame[key]["subcalls"], indent + 2)

# print_frame(root)
# total_blocks = 0
# for key in sorted(root):
# total_blocks += root[key]["blocks"]
# print(total_blocks, "total blocks")

# with open("allocation_history.json", "w") as f:
# json.dump(allocation_history, f)
15 changes: 11 additions & 4 deletions tools/output_gc_until_repl.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,23 @@ set logging on
set remote hardware-breakpoint-limit 4

# gc log
break gc.c:103
break gc.c:106
commands
backtrace
p/x start_block
p/x length
append binary memory ram.bin &_srelocate &_estack
p/x ticks_ms
# backtrace output redirect is currently broken in gdb so we use up instead.
# https://sourceware.org/bugzilla/show_bug.cgi?id=23439
# backtrace
up
up
up
up
# append binary memory ram.bin &_srelocate &_estack
continue
end

break main.c:179
break main.c:251

continue

Expand Down