Skip to content

Commit a3d48cd

Browse files
authored
[mypyc] Support separate compilation (#7636)
This adds support for separate compilation to mypyc. A separate argument is added to mypycify. If True, mypyc places every module its own separate shared library. Otherwise it can take a list of groups of source files that should be placed in a shared library together. The shared libraries communication with each other using the C API's intended mechanism for communicating between C extension modules: Capsules. Each library creates a table of pointers to all of its exported data and stores a pointer to it in a capsule stored as a module attribute. When a library is loaded, it loads the linking table capsules from all its dependencies and copies them into a local copy of the table. (To eliminate the need for a pointer indirection when accessing it.) This adds a test mode that will run all multi-module run tests in separate compilation mode. I also manually tested mypy itself compiled in separate compilation mode. This supports a limited form of incremental compilation already: only modules that changed or had a header they depend on change will be recompiled by the C compiler. The entire project still needs to go through the entire mypy/mypyc front-end and middle-end, however. I expect to have a PR that adds support for hooking into incremental mode next week. This is progress on mypyc/mypyc#682.
1 parent e006b94 commit a3d48cd

18 files changed

+652
-189
lines changed

mypyc/build.py

Lines changed: 155 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@
2323
import os.path
2424
import hashlib
2525
import time
26+
import re
2627

27-
from typing import Any, Dict, List, Optional, Tuple, cast
28+
from typing import List, Tuple, Any, Optional, Dict, Union, Set, cast
2829
MYPY = False
2930
if MYPY:
3031
from typing import NoReturn
@@ -36,6 +37,7 @@
3637
from mypyc.namegen import exported_name
3738
from mypyc.options import CompilerOptions
3839
from mypyc.errors import Errors
40+
from mypyc.common import shared_lib_name
3941

4042
from mypyc import emitmodule
4143

@@ -112,7 +114,8 @@ def get_mypy_config(paths: List[str],
112114
PyMODINIT_FUNC
113115
PyInit_{modname}(void)
114116
{{
115-
void *init_func = PyCapsule_Import("{libname}.{full_modname}", 0);
117+
if (!PyImport_ImportModule("{libname}")) return NULL;
118+
void *init_func = PyCapsule_Import("{libname}.init_{full_modname}", 0);
116119
if (!init_func) {{
117120
return NULL;
118121
}}
@@ -126,48 +129,55 @@ def get_mypy_config(paths: List[str],
126129

127130

128131
def generate_c_extension_shim(
129-
full_module_name: str, module_name: str, dirname: str, libname: str) -> str:
132+
full_module_name: str, module_name: str, dir_name: str, group_name: str) -> str:
130133
"""Create a C extension shim with a passthrough PyInit function.
131134
132135
Arguments:
133-
* full_module_name: the dotted full module name
134-
* module_name: the final component of the module name
135-
* dirname: the directory to place source code
136-
* libname: the name of the module where the code actually lives
136+
full_module_name: the dotted full module name
137+
module_name: the final component of the module name
138+
dir_name: the directory to place source code
139+
group_name: the name of the group
137140
"""
138-
cname = '%s.c' % full_module_name.replace('.', '___') # XXX
139-
cpath = os.path.join(dirname, cname)
141+
cname = '%s.c' % exported_name(full_module_name)
142+
cpath = os.path.join(dir_name, cname)
140143

141144
write_file(
142145
cpath,
143146
shim_template.format(modname=module_name,
144-
libname=libname,
147+
libname=shared_lib_name(group_name),
145148
full_modname=exported_name(full_module_name)))
146149

147150
return cpath
148151

149152

150-
def shared_lib_name(modules: List[str]) -> str:
151-
"""Produce a probably unique name for a library from a list of module names."""
153+
def group_name(modules: List[str]) -> str:
154+
"""Produce a probably unique name for a group from a list of module names."""
155+
if len(modules) == 1:
156+
return exported_name(modules[0])
157+
152158
h = hashlib.sha1()
153159
h.update(','.join(modules).encode())
154-
return 'mypyc_%s' % h.hexdigest()[:20]
160+
return h.hexdigest()[:20]
155161

156162

157163
def include_dir() -> str:
158164
"""Find the path of the lib-rt dir that needs to be included"""
159165
return os.path.join(os.path.abspath(os.path.dirname(__file__)), 'lib-rt')
160166

161167

162-
def generate_c(sources: List[BuildSource], options: Options,
163-
shared_lib_name: Optional[str],
168+
def generate_c(sources: List[BuildSource],
169+
options: Options,
170+
groups: emitmodule.Groups,
164171
compiler_options: Optional[CompilerOptions] = None
165-
) -> Tuple[List[Tuple[str, str]], str]:
172+
) -> Tuple[List[List[Tuple[str, str]]], str]:
166173
"""Drive the actual core compilation step.
167174
175+
The groups argument describes how modules are assigned to C
176+
extension modules. See the comments on the Groups type in
177+
mypyc.emitmodule for details.
178+
168179
Returns the C source code and (for debugging) the pretty printed IR.
169180
"""
170-
module_names = [source.module for source in sources]
171181
compiler_options = compiler_options or CompilerOptions()
172182

173183
# Do the actual work now
@@ -183,12 +193,17 @@ def generate_c(sources: List[BuildSource], options: Options,
183193
if compiler_options.verbose:
184194
print("Parsed and typechecked in {:.3f}s".format(t1 - t0))
185195

196+
all_module_names = []
197+
for group_sources, _ in groups:
198+
all_module_names.extend([source.module for source in group_sources])
199+
186200
errors = Errors()
187201

188202
ops = [] # type: List[str]
189-
ctext = emitmodule.compile_modules_to_c(result, module_names, shared_lib_name,
203+
ctext = emitmodule.compile_modules_to_c(result,
190204
compiler_options=compiler_options,
191-
errors=errors, ops=ops)
205+
errors=errors, ops=ops,
206+
groups=groups)
192207
if errors.num_errors:
193208
errors.flush_errors()
194209
sys.exit(1)
@@ -201,8 +216,9 @@ def generate_c(sources: List[BuildSource], options: Options,
201216

202217

203218
def build_using_shared_lib(sources: List[BuildSource],
204-
lib_name: str,
219+
group_name: str,
205220
cfiles: List[str],
221+
deps: List[str],
206222
build_dir: str,
207223
extra_compile_args: List[str],
208224
) -> List[Extension]:
@@ -218,15 +234,16 @@ def build_using_shared_lib(sources: List[BuildSource],
218234
Capsules stored in module attributes.
219235
"""
220236
extensions = [Extension(
221-
lib_name,
237+
shared_lib_name(group_name),
222238
sources=cfiles,
223239
include_dirs=[include_dir()],
240+
depends=deps,
224241
extra_compile_args=extra_compile_args,
225242
)]
226243

227244
for source in sources:
228245
module_name = source.module.split('.')[-1]
229-
shim_file = generate_c_extension_shim(source.module, module_name, build_dir, lib_name)
246+
shim_file = generate_c_extension_shim(source.module, module_name, build_dir, group_name)
230247

231248
# We include the __init__ in the "module name" we stick in the Extension,
232249
# since this seems to be needed for it to end up in the right place.
@@ -276,23 +293,103 @@ def write_file(path: str, contents: str) -> None:
276293
f.write(contents)
277294

278295

279-
def mypycify(paths: List[str],
280-
mypy_options: Optional[List[str]] = None,
281-
opt_level: str = '3',
282-
multi_file: bool = False,
283-
skip_cgen: bool = False,
284-
verbose: bool = False,
285-
strip_asserts: bool = False) -> List[Extension]:
296+
def construct_groups(
297+
sources: List[BuildSource],
298+
separate: Union[bool, List[Tuple[List[str], Optional[str]]]],
299+
use_shared_lib: bool,
300+
) -> emitmodule.Groups:
301+
"""Compute Groups given the input source list and separate configs.
302+
303+
separate is the user-specified configuration for how to assign
304+
modules to compilation groups (see mypycify docstring for details).
305+
306+
This takes that and expands it into our internal representation of
307+
group configuration, documented in mypyc.emitmodule's definition
308+
of Group.
309+
"""
310+
311+
if separate is True:
312+
groups = [
313+
([source], None) for source in sources
314+
] # type: emitmodule.Groups
315+
elif isinstance(separate, list):
316+
groups = []
317+
used_sources = set()
318+
for files, name in separate:
319+
group_sources = [src for src in sources if src.path in files]
320+
groups.append((group_sources, name))
321+
used_sources.update(group_sources)
322+
unused_sources = [src for src in sources if src not in used_sources]
323+
if unused_sources:
324+
groups.extend([([source], None) for source in unused_sources])
325+
else:
326+
groups = [(sources, None)]
327+
328+
# Generate missing names
329+
for i, (group, name) in enumerate(groups):
330+
if use_shared_lib and not name:
331+
name = group_name([source.module for source in group])
332+
groups[i] = (group, name)
333+
334+
return groups
335+
336+
337+
def get_header_deps(cfiles: List[Tuple[str, str]]) -> List[str]:
338+
"""Find all the headers used by a group of cfiles.
339+
340+
We do this by just regexping the source, which is a bit simpler than
341+
properly plumbing the data through.
342+
343+
Arguments:
344+
cfiles: A list of (file name, file contents) pairs.
345+
"""
346+
headers = set() # type: Set[str]
347+
for _, contents in cfiles:
348+
headers.update(re.findall(r'#include "(.*)"', contents))
349+
350+
return sorted(headers)
351+
352+
353+
def mypycify(
354+
paths: List[str],
355+
mypy_options: Optional[List[str]] = None,
356+
*,
357+
verbose: bool = False,
358+
opt_level: str = '3',
359+
strip_asserts: bool = False,
360+
multi_file: bool = False,
361+
separate: Union[bool, List[Tuple[List[str], Optional[str]]]] = False,
362+
skip_cgen_input: Optional[Any] = None
363+
) -> List[Extension]:
286364
"""Main entry point to building using mypyc.
287365
288366
This produces a list of Extension objects that should be passed as the
289367
ext_modules parameter to setup.
290368
291369
Arguments:
292-
* paths: A list of file paths to build. It may contain globs.
293-
* mypy_options: Optionally, a list of command line flags to pass to mypy.
370+
paths: A list of file paths to build. It may contain globs.
371+
mypy_options: Optionally, a list of command line flags to pass to mypy.
294372
(This can also contain additional files, for compatibility reasons.)
295-
* opt_level: The optimization level, as a string. Defaults to '3' (meaning '-O3').
373+
verbose: Should mypyc be more verbose. Defaults to false.
374+
375+
opt_level: The optimization level, as a string. Defaults to '3' (meaning '-O3').
376+
strip_asserts: Should asserts be stripped from the generated code.
377+
378+
multi_file: Should each Python module be compiled into its own C source file.
379+
This can reduce compile time and memory requirements at the likely
380+
cost of runtime performance of compiled code. Defaults to false.
381+
separate: Should compiled modules be placed in separate extension modules.
382+
If False, all modules are placed in a single shared library.
383+
If True, every module is placed in its own library.
384+
Otherwise separate should be a list of
385+
(file name list, optional shared library name) pairs specifying
386+
groups of files that should be placed in the same shared library
387+
(while all other modules will be placed in its own library).
388+
389+
Each group can be compiled independently, which can
390+
speed up compilation, but calls between groups can
391+
be slower than calls within a group and can't be
392+
inlined.
296393
"""
297394

298395
setup_mypycify_vars()
@@ -321,25 +418,31 @@ def mypycify(paths: List[str],
321418
# around with making the single module code handle packages.)
322419
use_shared_lib = len(sources) > 1 or any('.' in x.module for x in sources)
323420

324-
lib_name = shared_lib_name([source.module for source in sources]) if use_shared_lib else None
421+
groups = construct_groups(sources, separate, use_shared_lib)
325422

326-
# We let the test harness make us skip doing the full compilation
423+
# We let the test harness just pass in the c file contents instead
327424
# so that it can do a corner-cutting version without full stubs.
328-
# TODO: Be able to do this based on file mtimes?
329-
if not skip_cgen:
330-
cfiles, ops_text = generate_c(sources, options, lib_name,
331-
compiler_options=compiler_options)
425+
if not skip_cgen_input:
426+
group_cfiles, ops_text = generate_c(sources, options, groups,
427+
compiler_options=compiler_options)
332428
# TODO: unique names?
333429
with open(os.path.join(build_dir, 'ops.txt'), 'w') as f:
334430
f.write(ops_text)
431+
else:
432+
group_cfiles = skip_cgen_input
433+
434+
# Write out the generated C and collect the files for each group
435+
group_cfilenames = [] # type: List[Tuple[List[str], List[str]]]
436+
for cfiles in group_cfiles:
335437
cfilenames = []
336438
for cfile, ctext in cfiles:
337439
cfile = os.path.join(build_dir, cfile)
338440
write_file(cfile, ctext)
339441
if os.path.splitext(cfile)[1] == '.c':
340442
cfilenames.append(cfile)
341-
else:
342-
cfilenames = glob.glob(os.path.join(build_dir, '*.c'))
443+
444+
deps = [os.path.join(build_dir, dep) for dep in get_header_deps(cfiles)]
445+
group_cfilenames.append((cfilenames, deps))
343446

344447
cflags = [] # type: List[str]
345448
if compiler.compiler_type == 'unix':
@@ -370,17 +473,22 @@ def mypycify(paths: List[str],
370473
]
371474

372475
# Copy the runtime library in
476+
shared_cfilenames = []
373477
for name in ['CPy.c', 'getargs.c']:
374478
rt_file = os.path.join(build_dir, name)
375479
with open(os.path.join(include_dir(), name), encoding='utf-8') as f:
376480
write_file(rt_file, f.read())
377-
cfilenames.append(rt_file)
378-
379-
if use_shared_lib:
380-
assert lib_name
381-
extensions = build_using_shared_lib(sources, lib_name, cfilenames, build_dir, cflags)
382-
else:
383-
extensions = build_single_module(sources, cfilenames, cflags)
481+
shared_cfilenames.append(rt_file)
482+
483+
extensions = []
484+
for (group_sources, lib_name), (cfilenames, deps) in zip(groups, group_cfilenames):
485+
if use_shared_lib:
486+
assert lib_name
487+
extensions.extend(build_using_shared_lib(
488+
group_sources, lib_name, cfilenames + shared_cfilenames, deps, build_dir, cflags))
489+
else:
490+
extensions.extend(build_single_module(
491+
group_sources, cfilenames + shared_cfilenames, cflags))
384492

385493
return extensions
386494

mypyc/common.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,11 @@
3131

3232
def decorator_helper_name(func_name: str) -> str:
3333
return '__mypyc_{}_decorator_helper__'.format(func_name)
34+
35+
36+
def shared_lib_name(group_name: str) -> str:
37+
"""Given a group name, return the actual name of its extension module.
38+
39+
(This just adds a prefix.)
40+
"""
41+
return 'mypyc_{}'.format(group_name)

0 commit comments

Comments
 (0)