Skip to content

Commit 5e73b74

Browse files
WIP: Specialize CALL_FUNCTION for builtins
1 parent 30f7a77 commit 5e73b74

File tree

6 files changed

+210
-2
lines changed

6 files changed

+210
-2
lines changed

Include/internal/pycore_code.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,14 @@ typedef struct {
5353
uint32_t builtin_keys_version;
5454
} _PyLoadGlobalCache;
5555

56+
typedef struct {
57+
union {
58+
PyCFunction cfunc;
59+
/* TODO: func_version field for Python function calls*/
60+
uint64_t _; /* Just for alignment on 32-bit */
61+
};
62+
} _PyCallFunctionCache;
63+
5664
/* Add specialized versions of entries to this union.
5765
*
5866
* Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
@@ -68,6 +76,7 @@ typedef union {
6876
_PyAdaptiveEntry adaptive;
6977
_PyLoadAttrCache load_attr;
7078
_PyLoadGlobalCache load_global;
79+
_PyCallFunctionCache call_function;
7180
} SpecializedCacheEntry;
7281

7382
#define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
@@ -319,10 +328,25 @@ cache_backoff(_PyAdaptiveEntry *entry) {
319328
entry->counter = BACKOFF;
320329
}
321330

331+
/* Corresponds to various function pointers
332+
https://docs.python.org/3/c-api/structures.html#implementing-functions-and-methods
333+
*/
334+
typedef enum {
335+
PYCFUNCTION = 1,
336+
PYCFUNCTION_O = 2,
337+
PYCFUNCTION_NOARGS = 3,
338+
PYCFUNCTION_WITH_KEYWORDS = 4,
339+
_PYCFUNCTION_FAST = 5,
340+
_PYCFUNCTION_FAST_WITH_KEYWORDS = 6,
341+
PYCMETHOD = 7, /* Isn't normally used in builtins. */
342+
} _BuiltinCallKinds;
343+
322344
/* Specialization functions */
323345

324346
int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
325347
int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
348+
int _Py_Specialize_CallFunction(PyObject **stack_pointer, uint8_t original_oparg,
349+
PyObject *builtins, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache);
326350

327351
#define SPECIALIZATION_STATS 0
328352
#define SPECIALIZATION_STATS_DETAILED 0

Include/opcode.h

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/opcode.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,4 +229,6 @@ def jabs_op(name, op):
229229
"LOAD_GLOBAL_ADAPTIVE",
230230
"LOAD_GLOBAL_MODULE",
231231
"LOAD_GLOBAL_BUILTIN",
232+
"CALL_FUNCTION_ADAPTIVE",
233+
"CALL_FUNCTION_BUILTIN",
232234
]

Python/ceval.c

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,12 @@ Py_LOCAL_INLINE(PyObject *) call_function(
5555
static PyObject * do_call_core(
5656
PyThreadState *tstate, PyObject *func,
5757
PyObject *callargs, PyObject *kwdict, int use_tracing);
58+
Py_LOCAL_INLINE(PyObject *) call_function_builtin(
59+
PyThreadState *tstate,
60+
_PyAdaptiveEntry *cache0,
61+
_PyCallFunctionCache *cache1,
62+
PyObject ***pp_stack,
63+
Py_ssize_t oparg, int use_tracing);
5864

5965
#ifdef LLTRACE
6066
static int lltrace;
@@ -4066,7 +4072,43 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
40664072
CHECK_EVAL_BREAKER();
40674073
DISPATCH();
40684074
}
4075+
case TARGET(CALL_FUNCTION_ADAPTIVE): {
4076+
SpecializedCacheEntry *cache = GET_CACHE();
4077+
if (cache->adaptive.counter == 0) {
4078+
PyObject *callable = PEEK(cache->adaptive.original_oparg + 1);
4079+
next_instr--;
4080+
if (_Py_Specialize_CallFunction(stack_pointer, cache->adaptive.original_oparg, BUILTINS(), next_instr, cache) < 0) {
4081+
goto error;
4082+
}
4083+
DISPATCH();
4084+
}
4085+
else {
4086+
STAT_INC(CALL_FUNCTION, deferred);
4087+
cache->adaptive.counter--;
4088+
oparg = cache->adaptive.original_oparg;
4089+
JUMP_TO_INSTRUCTION(CALL_FUNCTION);
4090+
}
4091+
}
4092+
case TARGET(CALL_FUNCTION_BUILTIN): {
4093+
/* Builtin functions, WITHOUT keywords */
4094+
SpecializedCacheEntry *caches = GET_CACHE();
4095+
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
4096+
_PyCallFunctionCache *cache1 = &caches[-1].call_function;
4097+
PyObject *callable = PEEK(cache0->original_oparg + 1);
4098+
DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION);
4099+
DEOPT_IF(PyCFunction_GET_FUNCTION(callable) != cache1->cfunc, CALL_FUNCTION);
40694100

4101+
PyObject **sp, *res;
4102+
sp = stack_pointer;
4103+
res = call_function_builtin(tstate, cache0, cache1, &sp,
4104+
cache0->original_oparg, cframe.use_tracing);
4105+
stack_pointer = sp;
4106+
PUSH(res);
4107+
DEOPT_IF(res == NULL, CALL_FUNCTION);
4108+
record_cache_hit(cache0);
4109+
STAT_INC(CALL_FUNCTION, hit);
4110+
DISPATCH();
4111+
}
40704112
case TARGET(CALL_FUNCTION_KW): {
40714113
PyObject **sp, *res, *names;
40724114

@@ -4297,6 +4339,7 @@ opname ## _miss: \
42974339

42984340
MISS_WITH_CACHE(LOAD_ATTR)
42994341
MISS_WITH_CACHE(LOAD_GLOBAL)
4342+
MISS_WITH_CACHE(CALL_FUNCTION)
43004343

43014344
error:
43024345
/* Double-check exception status. */
@@ -5872,6 +5915,65 @@ do_call_core(PyThreadState *tstate,
58725915
return PyObject_Call(func, callargs, kwdict);
58735916
}
58745917

5918+
/* Fast alternative for non-keyword calls to builtins. */
5919+
Py_LOCAL_INLINE(PyObject *) _Py_HOT_FUNCTION
5920+
call_function_builtin(PyThreadState *tstate,
5921+
_PyAdaptiveEntry *cache0,
5922+
_PyCallFunctionCache *cache1,
5923+
PyObject ***pp_stack,
5924+
Py_ssize_t oparg,
5925+
int use_tracing)
5926+
{
5927+
#define MAYBE_TRACE(cfunc) if (use_tracing) {C_TRACE(x, cfunc);} else {x = cfunc;}
5928+
5929+
PyObject **pfunc = (*pp_stack) - oparg - 1;
5930+
PyObject *x, *w;
5931+
PyObject **stack = (*pp_stack) - oparg;
5932+
5933+
PyObject *func = *pfunc; /* Only for tracing purposes */
5934+
PyObject *self = PyCFunction_GET_SELF(func);
5935+
PyCFunction cfunc = PyCFunction_GET_FUNCTION(func);
5936+
5937+
switch ((_BuiltinCallKinds)cache0->index) {
5938+
case PYCFUNCTION_NOARGS:
5939+
case PYCFUNCTION_O: {
5940+
MAYBE_TRACE(cfunc(self, *stack));
5941+
break;
5942+
}
5943+
case _PYCFUNCTION_FAST: {
5944+
MAYBE_TRACE(((_PyCFunctionFast)cfunc)(self, stack, oparg));
5945+
break;
5946+
}
5947+
case _PYCFUNCTION_FAST_WITH_KEYWORDS: {
5948+
MAYBE_TRACE(((_PyCFunctionFastWithKeywords)cfunc)(self, stack, oparg, 0));
5949+
break;
5950+
}
5951+
case PYCFUNCTION_WITH_KEYWORDS: {
5952+
PyObject *args = _PyTuple_FromArray(stack, oparg);
5953+
if (args == NULL) {
5954+
break;
5955+
}
5956+
MAYBE_TRACE(((PyCFunctionWithKeywords)cfunc)(self, args, NULL));
5957+
Py_DECREF(args);
5958+
break;
5959+
}
5960+
/* Bulitins shouldn't have these flags */
5961+
case PYCFUNCTION:
5962+
case PYCMETHOD:
5963+
default:
5964+
Py_UNREACHABLE();
5965+
break;
5966+
}
5967+
assert((x != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
5968+
5969+
/* Clear the stack of the function object. */
5970+
while ((*pp_stack) > pfunc) {
5971+
w = EXT_POP(*pp_stack);
5972+
Py_DECREF(w);
5973+
}
5974+
5975+
return x;
5976+
}
58755977
/* Extract a slice index from a PyLong or an object with the
58765978
nb_index slot defined, and store in *pi.
58775979
Silently reduce values larger than PY_SSIZE_T_MAX to PY_SSIZE_T_MAX,

Python/opcode_targets.h

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/specialize.c

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,12 +158,14 @@ get_cache_count(SpecializedCacheOrInstruction *quickened) {
158158
static uint8_t adaptive_opcodes[256] = {
159159
[LOAD_ATTR] = LOAD_ATTR_ADAPTIVE,
160160
[LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE,
161+
[CALL_FUNCTION] = CALL_FUNCTION_ADAPTIVE,
161162
};
162163

163164
/* The number of cache entries required for a "family" of instructions. */
164165
static uint8_t cache_requirements[256] = {
165166
[LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyLoadAttrCache */
166167
[LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */
168+
[CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyCallFunctionCache */
167169
};
168170

169171
/* Return the oparg for the cache_offset and instruction index.
@@ -633,3 +635,79 @@ _Py_Specialize_LoadGlobal(
633635
cache0->counter = saturating_start();
634636
return 0;
635637
}
638+
639+
int
640+
_Py_Specialize_CallFunction(PyObject **stack_pointer, uint8_t original_oparg,
641+
PyObject *builtins, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache)
642+
{
643+
PyObject *callable = stack_pointer[-(original_oparg + 1)];
644+
_PyAdaptiveEntry *cache0 = &cache->adaptive;
645+
_PyCallFunctionCache *cache1 = &cache[-1].call_function;
646+
if (!PyCallable_Check(callable)) {
647+
goto fail;
648+
}
649+
if (!PyDict_CheckExact(builtins)) {
650+
goto fail;
651+
}
652+
PyDictObject *builtins_dict = (PyDictObject *)builtins;
653+
if (builtins_dict->ma_keys->dk_kind != DICT_KEYS_UNICODE) {
654+
goto fail;
655+
}
656+
/* Specialize C methods */
657+
if (PyCFunction_CheckExact(callable)) {
658+
PyCFunctionObject *meth = (PyCFunctionObject *)callable;
659+
if (meth->m_ml == NULL) {
660+
goto fail;
661+
}
662+
const char *name_ascii = meth->m_ml->ml_name;
663+
/* Specialize builtins: check method actually exists in builtins */
664+
PyObject *value = PyDict_GetItemString(builtins, name_ascii);
665+
if (value == NULL ||
666+
value != (PyObject *)meth) {
667+
goto fail;
668+
}
669+
_BuiltinCallKinds kind = -1;
670+
switch (PyCFunction_GET_FLAGS(meth) & (METH_VARARGS | METH_FASTCALL |
671+
METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) {
672+
case METH_VARARGS:
673+
case METH_VARARGS | METH_KEYWORDS:
674+
kind = PYCFUNCTION_WITH_KEYWORDS;
675+
break;
676+
case METH_FASTCALL:
677+
kind = _PYCFUNCTION_FAST;
678+
break;
679+
case METH_FASTCALL | METH_KEYWORDS:
680+
kind = _PYCFUNCTION_FAST_WITH_KEYWORDS;
681+
break;
682+
case METH_NOARGS:
683+
kind = PYCFUNCTION_NOARGS;
684+
break;
685+
case METH_O:
686+
kind = PYCFUNCTION_O;
687+
break;
688+
case METH_METHOD | METH_FASTCALL | METH_KEYWORDS:
689+
kind = PYCMETHOD;
690+
break;
691+
default:
692+
SPECIALIZATION_FAIL(CALL_FUNCTION, type, callable, "bad call flags");
693+
goto fail;
694+
}
695+
assert(kind > 0);
696+
PyCFunction cfunc = PyCFunction_GET_FUNCTION(meth);
697+
assert(cfunc != NULL);
698+
*instr = _Py_MAKECODEUNIT(CALL_FUNCTION_BUILTIN, _Py_OPARG(*instr));
699+
cache0->index = (uint16_t)kind;
700+
cache1->cfunc = cfunc;
701+
goto success;
702+
}
703+
fail:
704+
STAT_INC(CALL_FUNCTION, specialization_failure);
705+
assert(!PyErr_Occurred());
706+
cache_backoff(cache0);
707+
return 0;
708+
success:
709+
STAT_INC(CALL_FUNCTION, specialization_success);
710+
assert(!PyErr_Occurred());
711+
cache0->counter = saturating_start();
712+
return 0;
713+
}

0 commit comments

Comments
 (0)