Skip to content

Commit 7661806

Browse files
authored
[lib2to3] Make grammar pickling faster (#6491)
* Now uses pickle protocol 4 * Doesn't wrap the grammar's `__dict__` in ordered dictionaries anymore as dictionaries in Python 3.6+ are ordered by default This still produces deterministic pickles (that hash the same with MD5). Tested with different PYTHONHASHSEED values.
1 parent 2bea947 commit 7661806

File tree

2 files changed

+3
-25
lines changed

2 files changed

+3
-25
lines changed

Lib/lib2to3/pgen2/grammar.py

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -86,21 +86,9 @@ def __init__(self):
8686
self.start = 256
8787

8888
def dump(self, filename):
89-
"""Dump the grammar tables to a pickle file.
90-
91-
dump() recursively changes all dict to OrderedDict, so the pickled file
92-
is not exactly the same as what was passed in to dump(). load() uses the
93-
pickled file to create the tables, but only changes OrderedDict to dict
94-
at the top level; it does not recursively change OrderedDict to dict.
95-
So, the loaded tables are different from the original tables that were
96-
passed to load() in that some of the OrderedDict (from the pickled file)
97-
are not changed back to dict. For parsing, this has no effect on
98-
performance because OrderedDict uses dict's __getitem__ with nothing in
99-
between.
100-
"""
89+
"""Dump the grammar tables to a pickle file."""
10190
with open(filename, "wb") as f:
102-
d = _make_deterministic(self.__dict__)
103-
pickle.dump(d, f, 2)
91+
pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
10492

10593
def load(self, filename):
10694
"""Load the grammar tables from a pickle file."""
@@ -141,17 +129,6 @@ def report(self):
141129
print("start", self.start)
142130

143131

144-
def _make_deterministic(top):
145-
if isinstance(top, dict):
146-
return collections.OrderedDict(
147-
sorted(((k, _make_deterministic(v)) for k, v in top.items())))
148-
if isinstance(top, list):
149-
return [_make_deterministic(e) for e in top]
150-
if isinstance(top, tuple):
151-
return tuple(_make_deterministic(e) for e in top)
152-
return top
153-
154-
155132
# Map from operator to number (since tokenize doesn't do this)
156133

157134
opmap_raw = """
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
lib2to3 now uses pickle protocol 4 for pre-computed grammars.

0 commit comments

Comments
 (0)