Skip to content

Commit a2a4b9f

Browse files
authored
Attempt to speed up deepfreeze.py (#107887)
* Instead of calling get_identifiers_and_strings(), extract identifiers and strings from pycore_global_strings.h. * Avoid ast.literal_eval(), it's very slow.
1 parent 3974534 commit a2a4b9f

File tree

2 files changed

+24
-13
lines changed

2 files changed

+24
-13
lines changed

Makefile.pre.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1253,7 +1253,7 @@ regen-frozen: Tools/build/freeze_modules.py $(FROZEN_FILES_IN)
12531253
.PHONY: regen-deepfreeze
12541254
regen-deepfreeze: $(DEEPFREEZE_OBJS)
12551255

1256-
DEEPFREEZE_DEPS=$(srcdir)/Tools/build/deepfreeze.py $(FREEZE_MODULE_DEPS) $(FROZEN_FILES_OUT)
1256+
DEEPFREEZE_DEPS=$(srcdir)/Tools/build/deepfreeze.py Include/internal/pycore_global_strings.h $(FREEZE_MODULE_DEPS) $(FROZEN_FILES_OUT)
12571257

12581258
# BEGIN: deepfreeze modules
12591259
Python/deepfreeze/deepfreeze.c: $(DEEPFREEZE_DEPS)

Tools/build/deepfreeze.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
by Python 3.10, and 3.11 features are not available.
77
"""
88
import argparse
9-
import ast
109
import builtins
1110
import collections
1211
import contextlib
@@ -17,10 +16,10 @@
1716
from typing import Dict, FrozenSet, TextIO, Tuple
1817

1918
import umarshal
20-
from generate_global_objects import get_identifiers_and_strings
19+
20+
ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
2121

2222
verbose = False
23-
identifiers, strings = get_identifiers_and_strings()
2423

2524
# This must be kept in sync with opcode.py
2625
RESUME = 151
@@ -114,13 +113,27 @@ def __init__(self, file: TextIO) -> None:
114113
self.hits, self.misses = 0, 0
115114
self.finis: list[str] = []
116115
self.inits: list[str] = []
116+
self.identifiers, self.strings = self.get_identifiers_and_strings()
117117
self.write('#include "Python.h"')
118118
self.write('#include "internal/pycore_gc.h"')
119119
self.write('#include "internal/pycore_code.h"')
120120
self.write('#include "internal/pycore_frame.h"')
121121
self.write('#include "internal/pycore_long.h"')
122122
self.write("")
123123

124+
def get_identifiers_and_strings(self) -> tuple[set[str], dict[str, str]]:
125+
filename = os.path.join(ROOT, "Include", "internal", "pycore_global_strings.h")
126+
with open(filename) as fp:
127+
lines = fp.readlines()
128+
identifiers: set[str] = set()
129+
strings: dict[str, str] = {}
130+
for line in lines:
131+
if m := re.search(r"STRUCT_FOR_ID\((\w+)\)", line):
132+
identifiers.add(m.group(1))
133+
if m := re.search(r'STRUCT_FOR_STR\((\w+), "(.*?)"\)', line):
134+
strings[m.group(2)] = m.group(1)
135+
return identifiers, strings
136+
124137
@contextlib.contextmanager
125138
def indent(self) -> None:
126139
save_level = self.level
@@ -171,9 +184,9 @@ def generate_bytes(self, name: str, b: bytes) -> str:
171184
return f"& {name}.ob_base.ob_base"
172185

173186
def generate_unicode(self, name: str, s: str) -> str:
174-
if s in strings:
175-
return f"&_Py_STR({strings[s]})"
176-
if s in identifiers:
187+
if s in self.strings:
188+
return f"&_Py_STR({self.strings[s]})"
189+
if s in self.identifiers:
177190
return f"&_Py_ID({s})"
178191
if len(s) == 1:
179192
c = ord(s)
@@ -441,12 +454,10 @@ def is_frozen_header(source: str) -> bool:
441454

442455

443456
def decode_frozen_data(source: str) -> types.CodeType:
444-
lines = source.splitlines()
445-
while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None:
446-
del lines[0]
447-
while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None:
448-
del lines[-1]
449-
values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip())
457+
values: list[int] = []
458+
for line in source.splitlines():
459+
if re.match(FROZEN_DATA_LINE, line):
460+
values.extend([int(x) for x in line.split(",") if x.strip()])
450461
data = bytes(values)
451462
return umarshal.loads(data)
452463

0 commit comments

Comments
 (0)