Skip to content

GH-113853: Guarantee forward progress in executors #113854

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jan 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Include/cpython/optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void);
PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset);

int
_PyOptimizer_BackEdge(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNIT *dest, PyObject **stack_pointer);
_PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer);

extern _PyOptimizerObject _PyOptimizer_Default;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Guarantee that all executors make progress. This then guarantees that tier 2
execution always makes progress.
18 changes: 13 additions & 5 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -2325,12 +2325,18 @@ dummy_func(
// Double-check that the opcode isn't instrumented or something:
if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
OPT_STAT_INC(attempts);
int optimized = _PyOptimizer_BackEdge(frame, this_instr, next_instr, stack_pointer);
_Py_CODEUNIT *start = this_instr;
/* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
while (oparg > 255) {
oparg >>= 8;
start--;
}
int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer);
ERROR_IF(optimized < 0, error);
if (optimized) {
// Rewind and enter the executor:
assert(this_instr->op.code == ENTER_EXECUTOR);
next_instr = this_instr;
assert(start->op.code == ENTER_EXECUTOR);
next_instr = start;
this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
}
else {
Expand Down Expand Up @@ -2370,10 +2376,12 @@ dummy_func(
GOTO_TIER_TWO();
}
else {
code->co_executors->executors[oparg & 255] = NULL;
/* ENTER_EXECUTOR will be the first code unit of the instruction */
assert(oparg < 256);
code->co_executors->executors[oparg] = NULL;
opcode = this_instr->op.code = executor->vm_data.opcode;
this_instr->op.arg = executor->vm_data.oparg;
oparg = (oparg & (~255)) | executor->vm_data.oparg;
oparg = executor->vm_data.oparg;
Py_DECREF(executor);
next_instr = this_instr;
DISPATCH_GOTO();
Expand Down
18 changes: 13 additions & 5 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

155 changes: 92 additions & 63 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,24 +162,22 @@ PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
}

int
_PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNIT *dest, PyObject **stack_pointer)
_PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer)
{
assert(src->op.code == JUMP_BACKWARD);
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
assert(PyCode_Check(code));
PyInterpreterState *interp = _PyInterpreterState_GET();
if (!has_space_for_executor(code, src)) {
if (!has_space_for_executor(code, start)) {
return 0;
}
_PyOptimizerObject *opt = interp->optimizer;
_PyExecutorObject *executor = NULL;
/* Start optimizing at the destination to guarantee forward progress */
int err = opt->optimize(opt, code, dest, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
int err = opt->optimize(opt, code, start, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
if (err <= 0) {
assert(executor == NULL);
return err;
}
int index = get_index_for_executor(code, src);
int index = get_index_for_executor(code, start);
if (index < 0) {
/* Out of memory. Don't raise and assume that the
* error will show up elsewhere.
Expand All @@ -190,7 +188,7 @@ _PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNI
Py_DECREF(executor);
return 0;
}
insert_executor(code, src, index, executor);
insert_executor(code, start, index, executor);
Py_DECREF(executor);
return 1;
}
Expand Down Expand Up @@ -316,38 +314,6 @@ BRANCH_TO_GUARD[4][2] = {
#define CONFIDENCE_RANGE 1000
#define CONFIDENCE_CUTOFF 333

/* Returns 1 on success,
* 0 if it failed to produce a worthwhile trace,
* and -1 on an error.
*/
static int
translate_bytecode_to_trace(
PyCodeObject *code,
_Py_CODEUNIT *instr,
_PyUOpInstruction *trace,
int buffer_size,
_PyBloomFilter *dependencies)
{
PyCodeObject *initial_code = code;
_Py_BloomFilter_Add(dependencies, initial_code);
_Py_CODEUNIT *initial_instr = instr;
int trace_length = 0;
int max_length = buffer_size;
struct {
PyCodeObject *code;
_Py_CODEUNIT *instr;
} trace_stack[TRACE_STACK_SIZE];
int trace_stack_depth = 0;
int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions

#ifdef Py_DEBUG
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
int lltrace = 0;
if (python_lltrace != NULL && *python_lltrace >= '0') {
lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
}
#endif

#ifdef Py_DEBUG
#define DPRINTF(level, ...) \
if (lltrace >= (level)) { printf(__VA_ARGS__); }
Expand Down Expand Up @@ -403,13 +369,47 @@ translate_bytecode_to_trace(
code = trace_stack[trace_stack_depth].code; \
instr = trace_stack[trace_stack_depth].instr;

/* Returns 1 on success,
* 0 if it failed to produce a worthwhile trace,
* and -1 on an error.
*/
static int
translate_bytecode_to_trace(
PyCodeObject *code,
_Py_CODEUNIT *instr,
_PyUOpInstruction *trace,
int buffer_size,
_PyBloomFilter *dependencies)
{
bool progress_needed = true;
PyCodeObject *initial_code = code;
_Py_BloomFilter_Add(dependencies, initial_code);
_Py_CODEUNIT *initial_instr = instr;
int trace_length = 0;
int max_length = buffer_size;
struct {
PyCodeObject *code;
_Py_CODEUNIT *instr;
} trace_stack[TRACE_STACK_SIZE];
int trace_stack_depth = 0;
int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions

#ifdef Py_DEBUG
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
int lltrace = 0;
if (python_lltrace != NULL && *python_lltrace >= '0') {
lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
}
#endif

DPRINTF(4,
"Optimizing %s (%s:%d) at byte offset %d\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code));
uint32_t target = 0;

top: // Jump here after _PUSH_FRAME or likely branches
for (;;) {
target = INSTR_IP(instr, code);
Expand All @@ -421,6 +421,15 @@ translate_bytecode_to_trace(
uint32_t oparg = instr->op.arg;
uint32_t extended = 0;

if (opcode == ENTER_EXECUTOR) {
assert(oparg < 256);
_PyExecutorObject *executor =
(_PyExecutorObject *)code->co_executors->executors[oparg];
opcode = executor->vm_data.opcode;
DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
oparg = executor->vm_data.oparg;
}

if (opcode == EXTENDED_ARG) {
instr++;
extended = 1;
Expand All @@ -431,13 +440,23 @@ translate_bytecode_to_trace(
goto done;
}
}

if (opcode == ENTER_EXECUTOR) {
_PyExecutorObject *executor =
(_PyExecutorObject *)code->co_executors->executors[oparg&255];
opcode = executor->vm_data.opcode;
DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
oparg = (oparg & 0xffffff00) | executor->vm_data.oparg;
assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);

/* Special case the first instruction,
* so that we can guarantee forward progress */
if (progress_needed) {
progress_needed = false;
if (opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_NO_INTERRUPT) {
instr += 1 + _PyOpcode_Caches[opcode] - (int32_t)oparg;
initial_instr = instr;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting. This will affect some of the debug output, notably the "Created a trace for ..." message below at line 676 will mention a different byte offset than the "Optimizing ..." above at line 405. I honestly like what you set it to here better, and debug level 4 is excessively verbose, so I don't care, but some bit of me will miss the correspondence. :-)

continue;
}
else {
if (OPCODE_HAS_DEOPT(opcode)) {
opcode = _PyOpcode_Deopt[opcode];
}
assert(!OPCODE_HAS_DEOPT(opcode));
}
}

switch (opcode) {
Expand Down Expand Up @@ -480,7 +499,9 @@ translate_bytecode_to_trace(
case JUMP_BACKWARD:
case JUMP_BACKWARD_NO_INTERRUPT:
{
if (instr + 2 - oparg == initial_instr && code == initial_code) {
_Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[opcode] - (int)oparg;
if (target == initial_instr) {
/* We have looped round to the start */
RESERVE(1);
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
}
Expand Down Expand Up @@ -641,35 +662,33 @@ translate_bytecode_to_trace(
}
assert(code == initial_code);
// Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE
if (trace_length > 4) {
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
DPRINTF(1,
"Created a trace for %s (%s:%d) at byte offset %d -- length %d\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code),
trace_length);
OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist);
return 1;
}
else {
if (progress_needed || trace_length < 5) {
OPT_STAT_INC(trace_too_short);
DPRINTF(4,
"No trace for %s (%s:%d) at byte offset %d\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code));
return 0;
}
return 0;
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
DPRINTF(1,
"Created a trace for %s (%s:%d) at byte offset %d -- length %d\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code),
trace_length);
OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist);
return 1;
}

#undef RESERVE
#undef RESERVE_RAW
#undef INSTR_IP
#undef ADD_TO_TRACE
#undef DPRINTF
}

#define UNSET_BIT(array, bit) (array[(bit)>>5] &= ~(1<<((bit)&31)))
#define SET_BIT(array, bit) (array[(bit)>>5] |= (1<<((bit)&31)))
Expand Down Expand Up @@ -854,10 +873,20 @@ counter_optimize(
int Py_UNUSED(curr_stackentries)
)
{
int oparg = instr->op.arg;
while (instr->op.code == EXTENDED_ARG) {
instr++;
oparg = (oparg << 8) | instr->op.arg;
}
if (instr->op.code != JUMP_BACKWARD) {
/* Counter optimizer can only handle backward edges */
return 0;
}
_Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[JUMP_BACKWARD] - oparg;
_PyUOpInstruction buffer[3] = {
{ .opcode = _LOAD_CONST_INLINE_BORROW, .operand = (uintptr_t)self },
{ .opcode = _INTERNAL_INCREMENT_OPT_COUNTER },
{ .opcode = _EXIT_TRACE, .target = (uint32_t)(instr - _PyCode_CODE(code)) }
{ .opcode = _EXIT_TRACE, .target = (uint32_t)(target - _PyCode_CODE(code)) }
};
_PyBloomFilter empty;
_Py_BloomFilter_Init(&empty);
Expand Down