Skip to content

Commit d4efd91

Browse files
jdemeyermethane
authored andcommitted
bpo-36904: Optimize _PyStack_UnpackDict (GH-14517)
1 parent 5bbbc73 commit d4efd91

File tree

2 files changed

+85
-81
lines changed

2 files changed

+85
-81
lines changed

Include/cpython/abstract.h

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -26,24 +26,6 @@ PyAPI_FUNC(PyObject *) _PyStack_AsDict(
2626
PyObject *const *values,
2727
PyObject *kwnames);
2828

29-
/* Convert (args, nargs, kwargs: dict) into a (stack, nargs, kwnames: tuple).
30-
31-
Return 0 on success, raise an exception and return -1 on error.
32-
33-
Write the new stack into *p_stack. If *p_stack is differen than args, it
34-
must be released by PyMem_Free().
35-
36-
The stack uses borrowed references.
37-
38-
The type of keyword keys is not checked, these checks should be done
39-
later (ex: _PyArg_ParseStackAndKeywords). */
40-
PyAPI_FUNC(int) _PyStack_UnpackDict(
41-
PyObject *const *args,
42-
Py_ssize_t nargs,
43-
PyObject *kwargs,
44-
PyObject *const **p_stack,
45-
PyObject **p_kwnames);
46-
4729
/* Suggested size (number of positional arguments) for arrays of PyObject*
4830
allocated on a C stack to avoid allocating memory on the heap memory. Such
4931
array is used to pass positional arguments to call functions of the

Objects/call.c

Lines changed: 85 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@
88
static PyObject *
99
cfunction_call_varargs(PyObject *func, PyObject *args, PyObject *kwargs);
1010

11+
static PyObject *const *
12+
_PyStack_UnpackDict(PyObject *const *args, Py_ssize_t nargs, PyObject *kwargs,
13+
PyObject **p_kwnames);
14+
15+
static void
16+
_PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
17+
PyObject *kwnames);
18+
1119

1220
static PyObject *
1321
null_error(void)
@@ -100,24 +108,19 @@ _PyObject_FastCallDict(PyObject *callable, PyObject *const *args,
100108
}
101109

102110
PyObject *res;
103-
if (kwargs == NULL) {
111+
if (kwargs == NULL || PyDict_GET_SIZE(kwargs) == 0) {
104112
res = func(callable, args, nargsf, NULL);
105113
}
106114
else {
107115
PyObject *kwnames;
108116
PyObject *const *newargs;
109-
if (_PyStack_UnpackDict(args, nargs, kwargs, &newargs, &kwnames) < 0) {
117+
newargs = _PyStack_UnpackDict(args, nargs, kwargs, &kwnames);
118+
if (newargs == NULL) {
110119
return NULL;
111120
}
112-
res = func(callable, newargs, nargs, kwnames);
113-
if (kwnames != NULL) {
114-
Py_ssize_t i, n = PyTuple_GET_SIZE(kwnames) + nargs;
115-
for (i = 0; i < n; i++) {
116-
Py_DECREF(newargs[i]);
117-
}
118-
PyMem_Free((PyObject **)newargs);
119-
Py_DECREF(kwnames);
120-
}
121+
res = func(callable, newargs,
122+
nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, kwnames);
123+
_PyStack_UnpackDict_Free(newargs, nargs, kwnames);
121124
}
122125
return _Py_CheckFunctionResult(callable, res, NULL);
123126
}
@@ -196,24 +199,23 @@ PyVectorcall_Call(PyObject *callable, PyObject *tuple, PyObject *kwargs)
196199
return NULL;
197200
}
198201

202+
Py_ssize_t nargs = PyTuple_GET_SIZE(tuple);
203+
204+
/* Fast path for no keywords */
205+
if (kwargs == NULL || PyDict_GET_SIZE(kwargs) == 0) {
206+
return func(callable, _PyTuple_ITEMS(tuple), nargs, NULL);
207+
}
208+
199209
/* Convert arguments & call */
200210
PyObject *const *args;
201-
Py_ssize_t nargs = PyTuple_GET_SIZE(tuple);
202211
PyObject *kwnames;
203-
if (_PyStack_UnpackDict(_PyTuple_ITEMS(tuple), nargs,
204-
kwargs, &args, &kwnames) < 0) {
212+
args = _PyStack_UnpackDict(_PyTuple_ITEMS(tuple), nargs, kwargs, &kwnames);
213+
if (args == NULL) {
205214
return NULL;
206215
}
207-
PyObject *result = func(callable, args, nargs, kwnames);
208-
if (kwnames != NULL) {
209-
Py_ssize_t i, n = PyTuple_GET_SIZE(kwnames) + nargs;
210-
for (i = 0; i < n; i++) {
211-
Py_DECREF(args[i]);
212-
}
213-
PyMem_Free((PyObject **)args);
214-
Py_DECREF(kwnames);
215-
}
216-
216+
PyObject *result = func(callable, args,
217+
nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, kwnames);
218+
_PyStack_UnpackDict_Free(args, nargs, kwnames);
217219
return result;
218220
}
219221

@@ -455,23 +457,22 @@ _PyMethodDef_RawFastCallDict(PyMethodDef *method, PyObject *self,
455457

456458
case METH_FASTCALL | METH_KEYWORDS:
457459
{
458-
PyObject *const *stack;
459-
PyObject *kwnames;
460460
_PyCFunctionFastWithKeywords fastmeth = (_PyCFunctionFastWithKeywords)(void(*)(void))meth;
461461

462-
if (_PyStack_UnpackDict(args, nargs, kwargs, &stack, &kwnames) < 0) {
463-
goto exit;
462+
/* Fast path for no keywords */
463+
if (kwargs == NULL || PyDict_GET_SIZE(kwargs) == 0) {
464+
result = (*fastmeth) (self, args, nargs, NULL);
465+
break;
464466
}
465467

466-
result = (*fastmeth) (self, stack, nargs, kwnames);
467-
if (kwnames != NULL) {
468-
Py_ssize_t i, n = nargs + PyTuple_GET_SIZE(kwnames);
469-
for (i = 0; i < n; i++) {
470-
Py_DECREF(stack[i]);
471-
}
472-
PyMem_Free((PyObject **)stack);
473-
Py_DECREF(kwnames);
468+
PyObject *const *stack;
469+
PyObject *kwnames;
470+
stack = _PyStack_UnpackDict(args, nargs, kwargs, &kwnames);
471+
if (stack == NULL) {
472+
goto exit;
474473
}
474+
result = (*fastmeth) (self, stack, nargs, kwnames);
475+
_PyStack_UnpackDict_Free(stack, nargs, kwnames);
475476
break;
476477
}
477478

@@ -1206,53 +1207,63 @@ _PyStack_AsDict(PyObject *const *values, PyObject *kwnames)
12061207
}
12071208

12081209

1209-
int
1210-
_PyStack_UnpackDict(PyObject *const *args, Py_ssize_t nargs, PyObject *kwargs,
1211-
PyObject *const **p_stack, PyObject **p_kwnames)
1212-
{
1213-
PyObject **stack, **kwstack;
1214-
Py_ssize_t nkwargs;
1215-
Py_ssize_t pos, i;
1216-
PyObject *key, *value;
1217-
PyObject *kwnames;
1210+
/* Convert (args, nargs, kwargs: dict) into a (stack, nargs, kwnames: tuple).
12181211
1219-
assert(nargs >= 0);
1220-
assert(kwargs == NULL || PyDict_CheckExact(kwargs));
1212+
Allocate a new argument vector and keyword names tuple. Return the argument
1213+
vector; return NULL with exception set on error. Return the keyword names
1214+
tuple in *p_kwnames.
12211215
1222-
if (kwargs == NULL || (nkwargs = PyDict_GET_SIZE(kwargs)) == 0) {
1223-
*p_stack = args;
1224-
*p_kwnames = NULL;
1225-
return 0;
1226-
}
1216+
The newly allocated argument vector supports PY_VECTORCALL_ARGUMENTS_OFFSET.
1217+
1218+
When done, you must call _PyStack_UnpackDict_Free(stack, nargs, kwnames)
12271219
1228-
if ((size_t)nargs > PY_SSIZE_T_MAX / sizeof(stack[0]) - (size_t)nkwargs) {
1220+
The type of keyword keys is not checked, these checks should be done
1221+
later (ex: _PyArg_ParseStackAndKeywords). */
1222+
static PyObject *const *
1223+
_PyStack_UnpackDict(PyObject *const *args, Py_ssize_t nargs, PyObject *kwargs,
1224+
PyObject **p_kwnames)
1225+
{
1226+
assert(nargs >= 0);
1227+
assert(kwargs != NULL);
1228+
assert(PyDict_Check(kwargs));
1229+
1230+
Py_ssize_t nkwargs = PyDict_GET_SIZE(kwargs);
1231+
/* Check for overflow in the PyMem_Malloc() call below. The subtraction
1232+
* in this check cannot overflow: both maxnargs and nkwargs are
1233+
* non-negative signed integers, so their difference fits in the type. */
1234+
Py_ssize_t maxnargs = PY_SSIZE_T_MAX / sizeof(args[0]) - 1;
1235+
if (nargs > maxnargs - nkwargs) {
12291236
PyErr_NoMemory();
1230-
return -1;
1237+
return NULL;
12311238
}
12321239

1233-
stack = PyMem_Malloc((nargs + nkwargs) * sizeof(stack[0]));
1240+
/* Add 1 to support PY_VECTORCALL_ARGUMENTS_OFFSET */
1241+
PyObject **stack = PyMem_Malloc((1 + nargs + nkwargs) * sizeof(args[0]));
12341242
if (stack == NULL) {
12351243
PyErr_NoMemory();
1236-
return -1;
1244+
return NULL;
12371245
}
12381246

1239-
kwnames = PyTuple_New(nkwargs);
1247+
PyObject *kwnames = PyTuple_New(nkwargs);
12401248
if (kwnames == NULL) {
12411249
PyMem_Free(stack);
1242-
return -1;
1250+
return NULL;
12431251
}
12441252

1253+
stack++; /* For PY_VECTORCALL_ARGUMENTS_OFFSET */
1254+
12451255
/* Copy positional arguments */
1246-
for (i = 0; i < nargs; i++) {
1256+
for (Py_ssize_t i = 0; i < nargs; i++) {
12471257
Py_INCREF(args[i]);
12481258
stack[i] = args[i];
12491259
}
12501260

1251-
kwstack = stack + nargs;
1252-
pos = i = 0;
1261+
PyObject **kwstack = stack + nargs;
12531262
/* This loop doesn't support lookup function mutating the dictionary
12541263
to change its size. It's a deliberate choice for speed, this function is
12551264
called in the performance critical hot code. */
1265+
Py_ssize_t pos = 0, i = 0;
1266+
PyObject *key, *value;
12561267
while (PyDict_Next(kwargs, &pos, &key, &value)) {
12571268
Py_INCREF(key);
12581269
Py_INCREF(value);
@@ -1261,7 +1272,18 @@ _PyStack_UnpackDict(PyObject *const *args, Py_ssize_t nargs, PyObject *kwargs,
12611272
i++;
12621273
}
12631274

1264-
*p_stack = stack;
12651275
*p_kwnames = kwnames;
1266-
return 0;
1276+
return stack;
1277+
}
1278+
1279+
static void
1280+
_PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
1281+
PyObject *kwnames)
1282+
{
1283+
Py_ssize_t n = PyTuple_GET_SIZE(kwnames) + nargs;
1284+
for (Py_ssize_t i = 0; i < n; i++) {
1285+
Py_DECREF(stack[i]);
1286+
}
1287+
PyMem_Free((PyObject **)stack - 1);
1288+
Py_DECREF(kwnames);
12671289
}

0 commit comments

Comments
 (0)