-
-
Notifications
You must be signed in to change notification settings - Fork 32.2k
bpo-31336: Speed up type creation, which is highly dominated by slow dict lookups. #3279
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
0a865c9
c4779e3
ff2ea63
76b1986
6f98485
d9b726e
be15255
0736226
8bc783f
66648dd
85fb3ae
09e716a
1d52082
f09bfd3
824d7cb
5184191
4efde8e
f5bce2a
02bfef0
2497858
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Speed up class creation by 10-20% by reducing the overhead in the | ||
necessary special method lookups. Patch by Stefan Behnel. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2360,35 +2360,39 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds) | |
&bases, &PyDict_Type, &orig_dict)) | ||
return NULL; | ||
|
||
/* Determine the proper metatype to deal with this: */ | ||
winner = _PyType_CalculateMetaclass(metatype, bases); | ||
if (winner == NULL) { | ||
return NULL; | ||
} | ||
|
||
if (winner != metatype) { | ||
if (winner->tp_new != type_new) /* Pass it to the winner */ | ||
return winner->tp_new(winner, args, kwds); | ||
metatype = winner; | ||
} | ||
|
||
/* Adjust for empty tuple bases */ | ||
nbases = PyTuple_GET_SIZE(bases); | ||
if (nbases == 0) { | ||
bases = PyTuple_Pack(1, &PyBaseObject_Type); | ||
base = &PyBaseObject_Type; | ||
bases = PyTuple_Pack(1, base); | ||
if (bases == NULL) | ||
goto error; | ||
return NULL; | ||
nbases = 1; | ||
} | ||
else | ||
Py_INCREF(bases); | ||
else { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any effect of this change? I tested There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I consider this part a cleanup that makes it clearer what operations are needed when, and how error cases are dealt with. |
||
/* Search the bases for the proper metatype to deal with this: */ | ||
winner = _PyType_CalculateMetaclass(metatype, bases); | ||
if (winner == NULL) { | ||
return NULL; | ||
} | ||
|
||
/* Calculate best base, and check that all bases are type objects */ | ||
base = best_base(bases); | ||
if (base == NULL) { | ||
goto error; | ||
if (winner != metatype) { | ||
if (winner->tp_new != type_new) /* Pass it to the winner */ | ||
return winner->tp_new(winner, args, kwds); | ||
metatype = winner; | ||
} | ||
|
||
/* Calculate best base, and check that all bases are type objects */ | ||
base = best_base(bases); | ||
if (base == NULL) { | ||
return NULL; | ||
} | ||
|
||
Py_INCREF(bases); | ||
} | ||
|
||
/* Use "goto error" from this point on as we now own the reference to "bases". */ | ||
|
||
dict = PyDict_Copy(orig_dict); | ||
if (dict == NULL) | ||
goto error; | ||
|
@@ -2938,54 +2942,46 @@ PyType_GetSlot(PyTypeObject *type, int slot) | |
return *(void**)(((char*)type) + slotoffsets[slot]); | ||
} | ||
|
||
/* Internal API to look for a name through the MRO. | ||
This returns a borrowed reference, and doesn't set an exception! */ | ||
PyObject * | ||
_PyType_Lookup(PyTypeObject *type, PyObject *name) | ||
/* Internal API to look for a name through the MRO, bypassing the method cache. | ||
This returns a borrowed reference, and might set an exception. | ||
'error' is set to: -1: error with exception; 1: error without exception; 0: ok */ | ||
static PyObject * | ||
find_name_in_mro(PyTypeObject *type, PyObject *name, int *error) | ||
{ | ||
Py_ssize_t i, n; | ||
PyObject *mro, *res, *base, *dict; | ||
unsigned int h; | ||
Py_hash_t hash; | ||
|
||
if (MCACHE_CACHEABLE_NAME(name) && | ||
PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)) { | ||
/* fast path */ | ||
h = MCACHE_HASH_METHOD(type, name); | ||
if (method_cache[h].version == type->tp_version_tag && | ||
method_cache[h].name == name) { | ||
#if MCACHE_STATS | ||
method_cache_hits++; | ||
#endif | ||
return method_cache[h].value; | ||
if (!PyUnicode_CheckExact(name) || | ||
(hash = ((PyASCIIObject *) name)->hash) == -1) | ||
{ | ||
hash = PyObject_Hash(name); | ||
if (hash == -1) { | ||
*error = -1; | ||
return NULL; | ||
} | ||
} | ||
|
||
/* Look in tp_dict of types in MRO */ | ||
mro = type->tp_mro; | ||
|
||
if (mro == NULL) { | ||
if ((type->tp_flags & Py_TPFLAGS_READYING) == 0 && | ||
PyType_Ready(type) < 0) { | ||
/* It's not ideal to clear the error condition, | ||
but this function is documented as not setting | ||
an exception, and I don't want to change that. | ||
When PyType_Ready() can't proceed, it won't | ||
set the "ready" flag, so future attempts to ready | ||
the same type will call it again -- hopefully | ||
in a context that propagates the exception out. | ||
*/ | ||
PyErr_Clear(); | ||
return NULL; | ||
if ((type->tp_flags & Py_TPFLAGS_READYING) == 0) { | ||
if (PyType_Ready(type) < 0) { | ||
*error = -1; | ||
return NULL; | ||
} | ||
mro = type->tp_mro; | ||
} | ||
mro = type->tp_mro; | ||
if (mro == NULL) { | ||
*error = 1; | ||
return NULL; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since this is the only non-exception raising error case left, I actually wonder if this is really an error case. If there is no MRO in a ready-ied type, isn't that just fine from the point of view of a lookup? |
||
} | ||
} | ||
|
||
res = NULL; | ||
/* keep a strong reference to mro because type->tp_mro can be replaced | ||
during PyDict_GetItem(dict, name) */ | ||
/* Keep a strong reference to mro because type->tp_mro can be replaced | ||
during dict lookup, e.g. when comparing to non-string keys. */ | ||
Py_INCREF(mro); | ||
assert(PyTuple_Check(mro)); | ||
n = PyTuple_GET_SIZE(mro); | ||
|
@@ -2994,11 +2990,61 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) | |
assert(PyType_Check(base)); | ||
dict = ((PyTypeObject *)base)->tp_dict; | ||
assert(dict && PyDict_Check(dict)); | ||
res = PyDict_GetItem(dict, name); | ||
res = _PyDict_GetItem_KnownHash(dict, name, hash); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Already changed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't see well the difference between PyDict_GetItem() and _PyDict_GetItem_KnownHash() in term of performance. Maybe the difference is that PyDict_GetItem() calls PyErr_Fetch/PyErr_Restore. In that case, PyDict_GetItemWithError() would have the same speed, no? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using The difference is:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. KnownHash is extremely short in comparison and probably gets inlined and streamlined with LTO. Substantially less branching. |
||
if (res != NULL) | ||
break; | ||
if (PyErr_Occurred()) { | ||
*error = -1; | ||
goto done; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I dislike error handling here. I suggest to call PyObject_GetItem() if PyDict_CheckExact() is false, and to really report the error to the caller (not ignore it). Would it possible? I don't think that it's correct to call _PyDict_GetItem_KnownHash() if tp_dict is a dict subtype. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also a good catch. That complicates things a bit, though. If we have a PyUnicode subtype for the name and a non-dict mapping, we could end up calling hash() once more than needed. I've moved the hash calculation down to get a lazy calculation for the exact-dict case. This leads to a small but measurable performance degradation of about 1-2% less speedup. It's still pretty good in comparison, I think. |
||
} | ||
*error = 0; | ||
done: | ||
Py_DECREF(mro); | ||
return res; | ||
} | ||
|
||
/* Internal API to look for a name through the MRO. | ||
This returns a borrowed reference, and doesn't set an exception! */ | ||
PyObject * | ||
_PyType_Lookup(PyTypeObject *type, PyObject *name) | ||
{ | ||
PyObject *res; | ||
int error; | ||
unsigned int h; | ||
|
||
if (MCACHE_CACHEABLE_NAME(name) && | ||
PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)) { | ||
/* fast path */ | ||
h = MCACHE_HASH_METHOD(type, name); | ||
if (method_cache[h].version == type->tp_version_tag && | ||
method_cache[h].name == name) { | ||
#if MCACHE_STATS | ||
method_cache_hits++; | ||
#endif | ||
return method_cache[h].value; | ||
} | ||
} | ||
|
||
/* We may end up clearing live exceptions below, so make sure it's ours. */ | ||
assert(!PyErr_Occurred()); | ||
|
||
res = find_name_in_mro(type, name, &error); | ||
/* Only put NULL results into cache if there was no error. */ | ||
if (error) { | ||
/* It's not ideal to clear the error condition, | ||
but this function is documented as not setting | ||
an exception, and I don't want to change that. | ||
E.g., when PyType_Ready() can't proceed, it won't | ||
set the "ready" flag, so future attempts to ready | ||
the same type will call it again -- hopefully | ||
in a context that propagates the exception out. | ||
*/ | ||
if (error == -1) { | ||
PyErr_Clear(); | ||
} | ||
return NULL; | ||
} | ||
|
||
if (MCACHE_CACHEABLE_NAME(name) && assign_version_tag(type)) { | ||
h = MCACHE_HASH_METHOD(type, name); | ||
|
@@ -6949,6 +6995,7 @@ update_one_slot(PyTypeObject *type, slotdef *p) | |
void *generic = NULL, *specific = NULL; | ||
int use_generic = 0; | ||
int offset = p->offset; | ||
int error; | ||
void **ptr = slotptr(type, offset); | ||
|
||
if (ptr == NULL) { | ||
|
@@ -6957,9 +7004,18 @@ update_one_slot(PyTypeObject *type, slotdef *p) | |
} while (p->offset == offset); | ||
return p; | ||
} | ||
/* We may end up clearing live exceptions below, so make sure it's ours. */ | ||
assert(!PyErr_Occurred()); | ||
do { | ||
descr = _PyType_Lookup(type, p->name_strobj); | ||
/* Use faster uncached lookup as we won't get any cache hits during type setup. */ | ||
descr = find_name_in_mro(type, p->name_strobj, &error); | ||
if (descr == NULL) { | ||
if (error == -1) { | ||
/* It is unlikely by not impossible that there has been an exception | ||
during lookup. Since this function originally expected no errors, | ||
we ignore them here in order to keep up the interface. */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think that "Since this function originally expected no errors" matters here. Why not reporting the exception to the caller and handle it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm working on a patch that cleans up much of the mess around |
||
PyErr_Clear(); | ||
} | ||
if (ptr == (void**)&type->tp_iternext) { | ||
specific = (void *)_PyObject_NextNotImplemented; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
PEP 7 nitpick: when you modify code, it's better to add { ... } to if blocks.