Skip to content

Commit 9cda14b

Browse files
committed
Remove redundant SET_IP and _CHECK_VALIDITY micro-ops
1 parent bb9a1a2 commit 9cda14b

File tree

3 files changed

+25
-34
lines changed

3 files changed

+25
-34
lines changed

Python/ceval.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,11 +1066,13 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
10661066
deoptimize:
10671067
// On DEOPT_IF we just repeat the last instruction.
10681068
// This presumes nothing was popped from the stack (nor pushed).
1069+
//assert(next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable) == frame->instr_ptr);
10691070
DPRINTF(2, "DEOPT: [Opcode %d, operand %" PRIu64 " @ %d]\n", opcode, operand, (int)(next_uop-current_executor->trace-1));
10701071
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
10711072
UOP_STAT_INC(opcode, miss);
10721073
frame->return_offset = 0; // Dispatch to frame->instr_ptr
10731074
_PyFrame_SetStackPointer(frame, stack_pointer);
1075+
frame->instr_ptr = next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable);
10741076
Py_DECREF(current_executor);
10751077
// Fall through
10761078
// Jump here from ENTER_EXECUTOR
@@ -1081,6 +1083,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
10811083
// Jump here from _EXIT_TRACE
10821084
exit_trace:
10831085
_PyFrame_SetStackPointer(frame, stack_pointer);
1086+
frame->instr_ptr = next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable);
10841087
Py_DECREF(current_executor);
10851088
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
10861089
goto enter_tier_one;

Python/optimizer.c

Lines changed: 13 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -465,18 +465,6 @@ translate_bytecode_to_trace(
465465
#define INSTR_IP(INSTR, CODE) \
466466
((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive)))
467467

468-
#define ADD_TO_STUB(INDEX, OPCODE, OPARG, OPERAND) \
469-
DPRINTF(2, " ADD_TO_STUB(%d, %s, %d, %" PRIu64 ")\n", \
470-
(INDEX), \
471-
uop_name(OPCODE), \
472-
(OPARG), \
473-
(uint64_t)(OPERAND)); \
474-
assert(reserved > 0); \
475-
reserved--; \
476-
trace[(INDEX)].opcode = (OPCODE); \
477-
trace[(INDEX)].oparg = (OPARG); \
478-
trace[(INDEX)].operand = (OPERAND);
479-
480468
// Reserve space for n uops
481469
#define RESERVE_RAW(n, opname) \
482470
if (trace_length + (n) > max_length) { \
@@ -485,7 +473,7 @@ translate_bytecode_to_trace(
485473
OPT_STAT_INC(trace_too_long); \
486474
goto done; \
487475
} \
488-
reserved = (n); // Keep ADD_TO_TRACE / ADD_TO_STUB honest
476+
reserved = (n); // Keep ADD_TO_TRACE honest
489477

490478
// Reserve space for main+stub uops, plus 3 for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE
491479
#define RESERVE(main, stub) RESERVE_RAW((main) + (stub) + 3, uop_name(opcode))
@@ -495,7 +483,7 @@ translate_bytecode_to_trace(
495483
if (trace_stack_depth >= TRACE_STACK_SIZE) { \
496484
DPRINTF(2, "Trace stack overflow\n"); \
497485
OPT_STAT_INC(trace_stack_overflow); \
498-
ADD_TO_TRACE(_SET_IP, 0, 0, 0); \
486+
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); \
499487
goto done; \
500488
} \
501489
trace_stack[trace_stack_depth].code = code; \
@@ -515,17 +503,19 @@ translate_bytecode_to_trace(
515503
PyUnicode_AsUTF8(code->co_filename),
516504
code->co_firstlineno,
517505
2 * INSTR_IP(initial_instr, code));
518-
506+
uint32_t target = 0;
519507
top: // Jump here after _PUSH_FRAME or likely branches
520508
for (;;) {
509+
target = INSTR_IP(instr, code);
521510
RESERVE_RAW(3, "epilogue"); // Always need space for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE
522-
ADD_TO_TRACE(_SET_IP, INSTR_IP(instr, code), 0, 0);
523-
ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, INSTR_IP(instr, code));
511+
ADD_TO_TRACE(_SET_IP, target, 0, target);
512+
ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target);
524513

525514
uint32_t opcode = instr->op.code;
526515
uint32_t oparg = instr->op.arg;
527516
uint32_t extras = 0;
528517

518+
529519
if (opcode == EXTENDED_ARG) {
530520
instr++;
531521
extras += 1;
@@ -560,7 +550,7 @@ translate_bytecode_to_trace(
560550
DPRINTF(4, "%s(%d): counter=%x, bitcount=%d, likely=%d, uopcode=%s\n",
561551
uop_name(opcode), oparg,
562552
counter, bitcount, jump_likely, uop_name(uopcode));
563-
ADD_TO_TRACE(uopcode, max_length, 0, INSTR_IP(instr, code));
553+
ADD_TO_TRACE(uopcode, max_length, 0, target);
564554
if (jump_likely) {
565555
_Py_CODEUNIT *target_instr = next_instr + oparg;
566556
DPRINTF(2, "Jump likely (%x = %d bits), continue at byte offset %d\n",
@@ -659,7 +649,7 @@ translate_bytecode_to_trace(
659649
expansion->uops[i].offset);
660650
Py_FatalError("garbled expansion");
661651
}
662-
ADD_TO_TRACE(uop, oparg, operand, INSTR_IP(instr, code));
652+
ADD_TO_TRACE(uop, oparg, operand, target);
663653
if (uop == _POP_FRAME) {
664654
TRACE_STACK_POP();
665655
DPRINTF(2,
@@ -688,15 +678,15 @@ translate_bytecode_to_trace(
688678
PyUnicode_AsUTF8(new_code->co_filename),
689679
new_code->co_firstlineno);
690680
OPT_STAT_INC(recursive_call);
691-
ADD_TO_TRACE(_SET_IP, 0, 0, 0);
681+
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
692682
goto done;
693683
}
694684
if (new_code->co_version != func_version) {
695685
// func.__code__ was updated.
696686
// Perhaps it may happen again, so don't bother tracing.
697687
// TODO: Reason about this -- is it better to bail or not?
698688
DPRINTF(2, "Bailing because co_version != func_version\n");
699-
ADD_TO_TRACE(_SET_IP, 0, 0, 0);
689+
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
700690
goto done;
701691
}
702692
// Increment IP to the return address
@@ -713,7 +703,7 @@ translate_bytecode_to_trace(
713703
2 * INSTR_IP(instr, code));
714704
goto top;
715705
}
716-
ADD_TO_TRACE(_SET_IP, 0, 0, 0);
706+
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
717707
goto done;
718708
}
719709
}
@@ -738,7 +728,7 @@ translate_bytecode_to_trace(
738728
assert(code == initial_code);
739729
// Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE
740730
if (trace_length > 4) {
741-
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, INSTR_IP(instr, code));
731+
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
742732
DPRINTF(1,
743733
"Created a trace for %s (%s:%d) at byte offset %d -- length %d+%d\n",
744734
PyUnicode_AsUTF8(code->co_qualname),

Python/optimizer_analysis.c

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,15 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
1717
{
1818
// Note that we don't enter stubs, those SET_IPs are needed.
1919
int last_set_ip = -1;
20-
bool need_ip = true;
2120
bool maybe_invalid = false;
2221
for (int pc = 0; pc < buffer_size; pc++) {
2322
int opcode = buffer[pc].opcode;
2423
if (opcode == _SET_IP) {
25-
if (!need_ip && last_set_ip >= 0) {
26-
buffer[last_set_ip].opcode = NOP;
27-
}
28-
need_ip = false;
24+
buffer[pc].opcode = NOP;
2925
last_set_ip = pc;
3026
}
3127
else if (opcode == _CHECK_VALIDITY) {
3228
if (maybe_invalid) {
33-
/* Exiting the trace requires that IP is correct */
34-
need_ip = true;
3529
maybe_invalid = false;
3630
}
3731
else {
@@ -42,12 +36,16 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
4236
break;
4337
}
4438
else {
45-
// If opcode has ERROR or DEOPT, set need_ip to true
46-
if (_PyOpcode_opcode_metadata[opcode].flags & (HAS_ERROR_FLAG | HAS_DEOPT_FLAG) || opcode == _PUSH_FRAME) {
47-
need_ip = true;
48-
}
4939
if (_PyOpcode_opcode_metadata[opcode].flags & HAS_ESCAPES_FLAG) {
5040
maybe_invalid = true;
41+
if (last_set_ip >= 0) {
42+
buffer[last_set_ip].opcode = _SET_IP;
43+
}
44+
}
45+
if ((_PyOpcode_opcode_metadata[opcode].flags & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) {
46+
if (last_set_ip >= 0) {
47+
buffer[last_set_ip].opcode = _SET_IP;
48+
}
5149
}
5250
}
5351
}

0 commit comments

Comments
 (0)