-
-
Notifications
You must be signed in to change notification settings - Fork 32.3k
bpo-28638: speed up namedtuple creation by avoiding exec #2736
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5634af4
c135a36
77be15f
649bb2e
da03fdb
805e0cd
1f13a26
af0c6bf
c28d4e4
d73ae9d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -303,6 +303,7 @@ def __eq__(self, other): | |
### namedtuple | ||
################################################################################ | ||
|
||
# Used only for the _source attribute, not for creating namedtuple classes. | ||
_class_template = """\ | ||
from builtins import property as _property, tuple as _tuple | ||
from operator import itemgetter as _itemgetter | ||
|
@@ -348,12 +349,32 @@ def __getnewargs__(self): | |
|
||
{field_defs} | ||
""" | ||
|
||
_repr_template = '{name}=%r' | ||
|
||
_field_template = '''\ | ||
{name} = _property(_itemgetter({index:d}), doc='Alias for field number {index:d}') | ||
''' | ||
_repr_template = '{name}=%r' | ||
_new_template = ''' | ||
def __new__(_cls, {arg_list}): | ||
return _tuple_new(_cls, ({arg_list})) | ||
''' | ||
|
||
|
||
class _source_descriptor: | ||
"""Descriptor for generating the _source attribute of a namedtuple.""" | ||
__slots__ = () | ||
|
||
def __get__(self, instance, owner): | ||
class_definition = _class_template.format( | ||
typename = owner.__name__, | ||
field_names = owner._fields, | ||
num_fields = owner._num_fields, | ||
arg_list = repr(owner._fields).replace("'", "")[1:-1], | ||
repr_fmt = owner._repr_fmt, | ||
field_defs = '\n'.join(_field_template.format(index=index, name=name) | ||
for index, name in enumerate(owner._fields)) | ||
) | ||
return class_definition | ||
|
||
|
||
def namedtuple(typename, field_names, *, verbose=False, rename=False, module=None): | ||
"""Returns a new subclass of tuple with named fields. | ||
|
@@ -392,44 +413,92 @@ def namedtuple(typename, field_names, *, verbose=False, rename=False, module=Non | |
or _iskeyword(name) | ||
or name.startswith('_') | ||
or name in seen): | ||
field_names[index] = '_%d' % index | ||
field_names[index] = f'_{index:d}' | ||
seen.add(name) | ||
for name in [typename] + field_names: | ||
if type(name) is not str: | ||
raise TypeError('Type names and field names must be strings') | ||
if not name.isidentifier(): | ||
raise ValueError('Type names and field names must be valid ' | ||
'identifiers: %r' % name) | ||
f'identifiers: {name!r}') | ||
if _iskeyword(name): | ||
raise ValueError('Type names and field names cannot be a ' | ||
'keyword: %r' % name) | ||
f'keyword: {name!r}') | ||
seen = set() | ||
for name in field_names: | ||
if name.startswith('_') and not rename: | ||
raise ValueError('Field names cannot start with an underscore: ' | ||
'%r' % name) | ||
f'{name!r}') | ||
if name in seen: | ||
raise ValueError('Encountered duplicate field name: %r' % name) | ||
raise ValueError(f'Encountered duplicate field name: {name!r}') | ||
seen.add(name) | ||
|
||
# Fill-in the class template | ||
class_definition = _class_template.format( | ||
typename = typename, | ||
field_names = tuple(field_names), | ||
num_fields = len(field_names), | ||
arg_list = repr(tuple(field_names)).replace("'", "")[1:-1], | ||
repr_fmt = ', '.join(_repr_template.format(name=name) | ||
for name in field_names), | ||
field_defs = '\n'.join(_field_template.format(index=index, name=name) | ||
for index, name in enumerate(field_names)) | ||
) | ||
|
||
# Execute the template string in a temporary namespace and support | ||
# tracing utilities by setting a value for frame.f_globals['__name__'] | ||
namespace = dict(__name__='namedtuple_%s' % typename) | ||
exec(class_definition, namespace) | ||
result = namespace[typename] | ||
result._source = class_definition | ||
arg_list = repr(tuple(field_names)).replace("'", "")[1:-1] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I feel There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The only difference is in the case of 1-element tuple. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is the trailing comma desirable in the 1-element tuple case? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Otherwise it's no longer a tuple. |
||
num_fields = len(field_names) | ||
repr_fmt = '(' + ', '.join(f'{name}=%r' for name in field_names) + ')' | ||
|
||
namespace = {'_tuple_new': tuple.__new__} | ||
new_source = _new_template.format(arg_list=arg_list) | ||
exec(new_source, namespace) | ||
__new__ = namespace['__new__'] | ||
__new__.__doc__ = f'Create new instance of {typename}({arg_list})' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Otherwise you get a strange trailing comma in this docstring There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This pull request is about performance. I don't think changing much is better.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Although the current pull request doesn't produce exactly the same code any more anyway, since |
||
|
||
@classmethod | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might be cleaner to move this down to the |
||
def _make(cls, iterable, new=tuple.__new__, len=len, num_fields=num_fields): | ||
result = new(cls, iterable) | ||
if len(result) != cls._num_fields: | ||
raise TypeError(f'Expected {num_fields} arguments, got {len(result)}') | ||
return result | ||
|
||
_make.__func__.__doc__ = (f'Make a new {typename} object from a sequence ' | ||
'or iterable') | ||
|
||
def _replace(_self, **kwds): | ||
result = _self._make(map(kwds.pop, _self._fields, _self)) | ||
if kwds: | ||
raise ValueError(f'Got unexpected field names: {list(kwds)}') | ||
return result | ||
|
||
_replace.__doc__ = (f'Return a new {typename} object replacing specified ' | ||
'fields with new values') | ||
|
||
def __repr__(self): | ||
'Return a nicely formatted representation string' | ||
return self.__class__.__name__ + self._repr_fmt % self | ||
|
||
def _asdict(self): | ||
'Return a new OrderedDict which maps field names to their values.' | ||
return OrderedDict(zip(self._fields, self)) | ||
|
||
def __getnewargs__(self): | ||
'Return self as a plain tuple. Used by copy and pickle.' | ||
return tuple(self) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These three functions without formatted docstrings don't need to be redeclared for each namedtuple, do they? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that doesn't work now because I'm setting |
||
|
||
module_name = f'namedtuple_{typename}' | ||
for method in (__new__, _make.__func__, _replace, __repr__, _asdict, __getnewargs__): | ||
method.__module__ = module_name | ||
method.__qualname__ = f'{typename}.{method.__name__}' | ||
|
||
class_namespace = { | ||
'__doc__': f'{typename}({arg_list})', | ||
'__slots__': (), | ||
'_fields': tuple(field_names), | ||
'__new__': __new__, | ||
'_make': _make, | ||
'_replace': _replace, | ||
'__repr__': __repr__, | ||
'_asdict': _asdict, | ||
'__getnewargs__': __getnewargs__, | ||
'_num_fields': len(field_names), | ||
'_repr_fmt': repr_fmt, | ||
'_source': _source_descriptor(), | ||
} | ||
for index, name in enumerate(field_names): | ||
doc = f'Alias for field number {index:d}' | ||
class_namespace[name] = property(_itemgetter(index), doc=doc) | ||
|
||
result = type(typename, (tuple,), class_namespace) | ||
|
||
if verbose: | ||
print(result._source) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Speed up namedtuple class creation by 4x by avoiding usage of exec(). Patch | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could be read as exec was eliminated, so perhaps '...by reducing usage of exec().'? |
||
by Jelle Zijlstra. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wouldn't that be better handled by inserting the trailing comma here?