Skip to content

Commit 0f7b0b3

Browse files
authored
bpo-29735: Optimize partial_call(): avoid tuple (#516)
* Add _PyObject_HasFastCall() * partial_call() now avoids temporary tuple to pass positional arguments if the callable supports the FASTCALL calling convention for positional arguments. * Fix also a performance regression in partial_call() if the callable doesn't support FASTCALL.
1 parent d4914e9 commit 0f7b0b3

File tree

3 files changed

+100
-35
lines changed

3 files changed

+100
-35
lines changed

Include/abstract.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,10 @@ PyAPI_FUNC(int) _PyStack_UnpackDict(
209209
40 bytes on the stack. */
210210
#define _PY_FASTCALL_SMALL_STACK 5
211211

212+
/* Return 1 if callable supports FASTCALL calling convention for positional
213+
arguments: see _PyObject_FastCallDict() and _PyObject_FastCallKeywords() */
214+
PyAPI_FUNC(int) _PyObject_HasFastCall(PyObject *callable);
215+
212216
/* Call the callable object 'callable' with the "fast call" calling convention:
213217
args is a C array for positional arguments (nargs is the number of
214218
positional arguments), kwargs is a dictionary for keyword arguments.

Modules/_functoolsmodule.c

Lines changed: 80 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ typedef struct {
1818
PyObject *kw;
1919
PyObject *dict;
2020
PyObject *weakreflist; /* List of weak references */
21+
int use_fastcall;
2122
} partialobject;
2223

2324
static PyTypeObject partial_type;
@@ -110,6 +111,8 @@ partial_new(PyTypeObject *type, PyObject *args, PyObject *kw)
110111
return NULL;
111112
}
112113

114+
pto->use_fastcall = _PyObject_HasFastCall(func);
115+
113116
return (PyObject *)pto;
114117
}
115118

@@ -127,69 +130,110 @@ partial_dealloc(partialobject *pto)
127130
}
128131

129132
static PyObject *
130-
partial_call(partialobject *pto, PyObject *args, PyObject *kw)
133+
partial_fastcall(partialobject *pto, PyObject **args, Py_ssize_t nargs,
134+
PyObject *kwargs)
131135
{
136+
PyObject *small_stack[_PY_FASTCALL_SMALL_STACK];
132137
PyObject *ret;
133-
PyObject *argappl, *kwappl;
134-
PyObject **stack;
135-
Py_ssize_t nargs;
138+
PyObject **stack, **stack_buf = NULL;
139+
Py_ssize_t nargs2, pto_nargs;
136140

137-
assert (PyCallable_Check(pto->fn));
138-
assert (PyTuple_Check(pto->args));
139-
assert (PyDict_Check(pto->kw));
141+
pto_nargs = PyTuple_GET_SIZE(pto->args);
142+
nargs2 = pto_nargs + nargs;
140143

141-
if (PyTuple_GET_SIZE(pto->args) == 0) {
142-
stack = &PyTuple_GET_ITEM(args, 0);
143-
nargs = PyTuple_GET_SIZE(args);
144-
argappl = NULL;
144+
if (pto_nargs == 0) {
145+
stack = args;
145146
}
146-
else if (PyTuple_GET_SIZE(args) == 0) {
147+
else if (nargs == 0) {
147148
stack = &PyTuple_GET_ITEM(pto->args, 0);
148-
nargs = PyTuple_GET_SIZE(pto->args);
149-
argappl = NULL;
150149
}
151150
else {
152-
stack = NULL;
153-
argappl = PySequence_Concat(pto->args, args);
154-
if (argappl == NULL) {
155-
return NULL;
151+
if (nargs2 <= (Py_ssize_t)Py_ARRAY_LENGTH(small_stack)) {
152+
stack = small_stack;
153+
}
154+
else {
155+
stack_buf = PyMem_Malloc(nargs2 * sizeof(PyObject *));
156+
if (stack_buf == NULL) {
157+
PyErr_NoMemory();
158+
return NULL;
159+
}
160+
stack = stack_buf;
156161
}
157162

158-
assert(PyTuple_Check(argappl));
163+
/* use borrowed references */
164+
memcpy(stack,
165+
&PyTuple_GET_ITEM(pto->args, 0),
166+
pto_nargs * sizeof(PyObject*));
167+
memcpy(&stack[pto_nargs],
168+
args,
169+
nargs * sizeof(PyObject*));
170+
}
171+
172+
ret = _PyObject_FastCallDict(pto->fn, stack, nargs2, kwargs);
173+
PyMem_Free(stack_buf);
174+
return ret;
175+
}
176+
177+
static PyObject *
178+
partial_call_impl(partialobject *pto, PyObject *args, PyObject *kwargs)
179+
{
180+
PyObject *ret, *args2;
181+
182+
/* Note: tupleconcat() is optimized for empty tuples */
183+
args2 = PySequence_Concat(pto->args, args);
184+
if (args2 == NULL) {
185+
return NULL;
159186
}
187+
assert(PyTuple_Check(args2));
188+
189+
ret = PyObject_Call(pto->fn, args2, kwargs);
190+
Py_DECREF(args2);
191+
return ret;
192+
}
193+
194+
static PyObject *
195+
partial_call(partialobject *pto, PyObject *args, PyObject *kwargs)
196+
{
197+
PyObject *kwargs2, *res;
198+
199+
assert (PyCallable_Check(pto->fn));
200+
assert (PyTuple_Check(pto->args));
201+
assert (PyDict_Check(pto->kw));
160202

161203
if (PyDict_GET_SIZE(pto->kw) == 0) {
162-
kwappl = kw;
163-
Py_XINCREF(kwappl);
204+
/* kwargs can be NULL */
205+
kwargs2 = kwargs;
206+
Py_XINCREF(kwargs2);
164207
}
165208
else {
166209
/* bpo-27840, bpo-29318: dictionary of keyword parameters must be
167210
copied, because a function using "**kwargs" can modify the
168211
dictionary. */
169-
kwappl = PyDict_Copy(pto->kw);
170-
if (kwappl == NULL) {
171-
Py_XDECREF(argappl);
212+
kwargs2 = PyDict_Copy(pto->kw);
213+
if (kwargs2 == NULL) {
172214
return NULL;
173215
}
174216

175-
if (kw != NULL) {
176-
if (PyDict_Merge(kwappl, kw, 1) != 0) {
177-
Py_XDECREF(argappl);
178-
Py_DECREF(kwappl);
217+
if (kwargs != NULL) {
218+
if (PyDict_Merge(kwargs2, kwargs, 1) != 0) {
219+
Py_DECREF(kwargs2);
179220
return NULL;
180221
}
181222
}
182223
}
183224

184-
if (stack) {
185-
ret = _PyObject_FastCallDict(pto->fn, stack, nargs, kwappl);
225+
226+
if (pto->use_fastcall) {
227+
res = partial_fastcall(pto,
228+
&PyTuple_GET_ITEM(args, 0),
229+
PyTuple_GET_SIZE(args),
230+
kwargs2);
186231
}
187232
else {
188-
ret = PyObject_Call(pto->fn, argappl, kwappl);
189-
Py_DECREF(argappl);
233+
res = partial_call_impl(pto, args, kwargs2);
190234
}
191-
Py_XDECREF(kwappl);
192-
return ret;
235+
Py_XDECREF(kwargs2);
236+
return res;
193237
}
194238

195239
static int
@@ -315,12 +359,13 @@ partial_setstate(partialobject *pto, PyObject *state)
315359
return NULL;
316360
}
317361

318-
Py_INCREF(fn);
319362
if (dict == Py_None)
320363
dict = NULL;
321364
else
322365
Py_INCREF(dict);
323366

367+
Py_INCREF(fn);
368+
pto->use_fastcall = _PyObject_HasFastCall(fn);
324369
Py_SETREF(pto->fn, fn);
325370
Py_SETREF(pto->args, fnargs);
326371
Py_SETREF(pto->kw, kw);

Objects/call.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,22 @@
22
#include "frameobject.h"
33

44

5+
int
6+
_PyObject_HasFastCall(PyObject *callable)
7+
{
8+
if (PyFunction_Check(callable)) {
9+
return 1;
10+
}
11+
else if (PyCFunction_Check(callable)) {
12+
return !(PyCFunction_GET_FLAGS(callable) & METH_VARARGS);
13+
}
14+
else {
15+
assert (PyCallable_Check(callable));
16+
return 0;
17+
}
18+
}
19+
20+
521
static PyObject *
622
null_error(void)
723
{

0 commit comments

Comments
 (0)