Skip to content

Commit bbb192d

Browse files
authored
More improvements to stubgen (#7951)
This includes many improvements to stubgen and a related mypy fix. Here are the most useful ones: * Use a separate process to do introspection of modules so that we can recover if a module kills the current process on import, for example. * Export all names imported from the current package by default. Add `--export-less` stubgen flag to disable this behavior. * Avoid a crash in semantic analysis if there's a bad property definition (stubgen can generate these). * Fix various issues with bad Python code being generated by stubgen. * Ignore bad signatures in docstrings (this is still very ad-hoc, but it's a bit more robust now). * Try to find a module using `sys.path` if we can't import it. * Skip some additional modules that may be runnable since they can cause trouble when we try to introspect them. This is again a big PR, but the commit history should be reasonably clean.
1 parent 384f32c commit bbb192d

File tree

9 files changed

+1023
-224
lines changed

9 files changed

+1023
-224
lines changed

mypy/moduleinspect.py

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
"""Basic introspection of modules."""
2+
3+
from typing import List, Optional, Union
4+
from types import ModuleType
5+
from multiprocessing import Process, Queue
6+
import importlib
7+
import inspect
8+
import os
9+
import pkgutil
10+
import queue
11+
import sys
12+
13+
14+
class ModuleProperties:
15+
def __init__(self,
16+
name: str,
17+
file: Optional[str],
18+
path: Optional[List[str]],
19+
all: Optional[List[str]],
20+
is_c_module: bool,
21+
subpackages: List[str]) -> None:
22+
self.name = name # __name__ attribute
23+
self.file = file # __file__ attribute
24+
self.path = path # __path__ attribute
25+
self.all = all # __all__ attribute
26+
self.is_c_module = is_c_module
27+
self.subpackages = subpackages
28+
29+
30+
def is_c_module(module: ModuleType) -> bool:
31+
if module.__dict__.get('__file__') is None:
32+
# Could be a namespace package. These must be handled through
33+
# introspection, since there is no source file.
34+
return True
35+
return os.path.splitext(module.__dict__['__file__'])[-1] in ['.so', '.pyd']
36+
37+
38+
class InspectError(Exception):
39+
pass
40+
41+
42+
def get_package_properties(package_id: str) -> ModuleProperties:
43+
"""Use runtime introspection to get information about a module/package."""
44+
try:
45+
package = importlib.import_module(package_id)
46+
except BaseException as e:
47+
raise InspectError(str(e))
48+
name = getattr(package, '__name__', None)
49+
file = getattr(package, '__file__', None)
50+
path = getattr(package, '__path__', None) # type: Optional[List[str]]
51+
if not isinstance(path, list):
52+
path = None
53+
pkg_all = getattr(package, '__all__', None)
54+
if pkg_all is not None:
55+
try:
56+
pkg_all = list(pkg_all)
57+
except Exception:
58+
pkg_all = None
59+
is_c = is_c_module(package)
60+
61+
if path is None:
62+
# Object has no path; this means it's either a module inside a package
63+
# (and thus no sub-packages), or it could be a C extension package.
64+
if is_c:
65+
# This is a C extension module, now get the list of all sub-packages
66+
# using the inspect module
67+
subpackages = [package.__name__ + "." + name
68+
for name, val in inspect.getmembers(package)
69+
if inspect.ismodule(val)
70+
and val.__name__ == package.__name__ + "." + name]
71+
else:
72+
# It's a module inside a package. There's nothing else to walk/yield.
73+
subpackages = []
74+
else:
75+
all_packages = pkgutil.walk_packages(path, prefix=package.__name__ + ".",
76+
onerror=lambda r: None)
77+
subpackages = [qualified_name for importer, qualified_name, ispkg in all_packages]
78+
return ModuleProperties(name=name,
79+
file=file,
80+
path=path,
81+
all=pkg_all,
82+
is_c_module=is_c,
83+
subpackages=subpackages)
84+
85+
86+
def worker(tasks: 'Queue[str]',
87+
results: 'Queue[Union[str, ModuleProperties]]',
88+
sys_path: List[str]) -> None:
89+
"""The main loop of a worker introspection process."""
90+
sys.path = sys_path
91+
while True:
92+
mod = tasks.get()
93+
try:
94+
prop = get_package_properties(mod)
95+
except InspectError as e:
96+
results.put(str(e))
97+
continue
98+
results.put(prop)
99+
100+
101+
class ModuleInspect:
102+
"""Perform runtime introspection of modules in a separate process.
103+
104+
Reuse the process for multiple modules for efficiency. However, if there is an
105+
error, retry using a fresh process to avoid cross-contamination of state between
106+
modules.
107+
108+
We use a separate process to isolate us from many side effects. For example, the
109+
import of a module may kill the current process, and we want to recover from that.
110+
111+
Always use in a with statement for proper clean-up:
112+
113+
with ModuleInspect() as m:
114+
p = m.get_package_properties('urllib.parse')
115+
"""
116+
117+
def __init__(self) -> None:
118+
self._start()
119+
120+
def _start(self) -> None:
121+
self.tasks = Queue() # type: Queue[str]
122+
self.results = Queue() # type: Queue[Union[ModuleProperties, str]]
123+
self.proc = Process(target=worker, args=(self.tasks, self.results, sys.path))
124+
self.proc.start()
125+
self.counter = 0 # Number of successfull roundtrips
126+
127+
def close(self) -> None:
128+
"""Free any resources used."""
129+
self.proc.terminate()
130+
131+
def get_package_properties(self, package_id: str) -> ModuleProperties:
132+
"""Return some properties of a module/package using runtime introspection.
133+
134+
Raise InspectError if the target couldn't be imported.
135+
"""
136+
self.tasks.put(package_id)
137+
res = self._get_from_queue()
138+
if res is None:
139+
# The process died; recover and report error.
140+
self._start()
141+
raise InspectError('Process died when importing %r' % package_id)
142+
if isinstance(res, str):
143+
# Error importing module
144+
if self.counter > 0:
145+
# Also try with a fresh process. Maybe one of the previous imports has
146+
# corrupted some global state.
147+
self.close()
148+
self._start()
149+
return self.get_package_properties(package_id)
150+
raise InspectError(res)
151+
self.counter += 1
152+
return res
153+
154+
def _get_from_queue(self) -> Union[ModuleProperties, str, None]:
155+
"""Get value from the queue.
156+
157+
Return the value read from the queue, or None if the process unexpectedly died.
158+
"""
159+
max_iter = 100
160+
n = 0
161+
while True:
162+
if n == max_iter:
163+
raise RuntimeError('Timeout waiting for subprocess')
164+
try:
165+
return self.results.get(timeout=0.05)
166+
except queue.Empty:
167+
if not self.proc.is_alive():
168+
return None
169+
n += 1
170+
171+
def __enter__(self) -> 'ModuleInspect':
172+
return self
173+
174+
def __exit__(self, *args: object) -> None:
175+
self.close()

mypy/semanal.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -865,19 +865,26 @@ def analyze_property_with_multi_part_definition(self, defn: OverloadedFuncDef) -
865865
defn.is_property = True
866866
items = defn.items
867867
first_item = cast(Decorator, defn.items[0])
868-
for item in items[1:]:
869-
if isinstance(item, Decorator) and len(item.decorators) == 1:
870-
node = item.decorators[0]
871-
if isinstance(node, MemberExpr):
872-
if node.name == 'setter':
873-
# The first item represents the entire property.
874-
first_item.var.is_settable_property = True
875-
# Get abstractness from the original definition.
876-
item.func.is_abstract = first_item.func.is_abstract
877-
else:
878-
self.fail("Decorated property not supported", item)
868+
deleted_items = []
869+
for i, item in enumerate(items[1:]):
879870
if isinstance(item, Decorator):
871+
if len(item.decorators) == 1:
872+
node = item.decorators[0]
873+
if isinstance(node, MemberExpr):
874+
if node.name == 'setter':
875+
# The first item represents the entire property.
876+
first_item.var.is_settable_property = True
877+
# Get abstractness from the original definition.
878+
item.func.is_abstract = first_item.func.is_abstract
879+
else:
880+
self.fail("Decorated property not supported", item)
880881
item.func.accept(self)
882+
else:
883+
self.fail('Unexpected definition for property "{}"'.format(first_item.func.name),
884+
item)
885+
deleted_items.append(i + 1)
886+
for i in reversed(deleted_items):
887+
del items[i]
881888

882889
def add_function_to_symbol_table(self, func: Union[FuncDef, OverloadedFuncDef]) -> None:
883890
if self.is_class_scope():

mypy/stubdoc.py

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
"""
66
import re
77
import io
8-
import sys
98
import contextlib
109
import tokenize
1110

@@ -18,14 +17,25 @@
1817
Sig = Tuple[str, str]
1918

2019

20+
_TYPE_RE = re.compile(r'^[a-zA-Z_][\w\[\], ]*(\.[a-zA-Z_][\w\[\], ]*)*$') # type: Final
21+
_ARG_NAME_RE = re.compile(r'\**[A-Za-z_][A-Za-z0-9_]*$') # type: Final
22+
23+
24+
def is_valid_type(s: str) -> bool:
25+
"""Try to determine whether a string might be a valid type annotation."""
26+
if s in ('True', 'False', 'retval'):
27+
return False
28+
if ',' in s and '[' not in s:
29+
return False
30+
return _TYPE_RE.match(s) is not None
31+
32+
2133
class ArgSig:
2234
"""Signature info for a single argument."""
2335

24-
_TYPE_RE = re.compile(r'^[a-zA-Z_][\w\[\], ]*(\.[a-zA-Z_][\w\[\], ]*)*$') # type: Final
25-
2636
def __init__(self, name: str, type: Optional[str] = None, default: bool = False):
2737
self.name = name
28-
if type and not self._TYPE_RE.match(type):
38+
if type and not is_valid_type(type):
2939
raise ValueError("Invalid type: " + type)
3040
self.type = type
3141
# Does this argument have a default value?
@@ -60,7 +70,8 @@ def __eq__(self, other: Any) -> bool:
6070

6171

6272
class DocStringParser:
63-
"""Parse function signstures in documentation."""
73+
"""Parse function signatures in documentation."""
74+
6475
def __init__(self, function_name: str) -> None:
6576
# Only search for signatures of function with this name.
6677
self.function_name = function_name
@@ -76,7 +87,7 @@ def __init__(self, function_name: str) -> None:
7687
self.signatures = [] # type: List[FunctionSig]
7788

7889
def add_token(self, token: tokenize.TokenInfo) -> None:
79-
"""Process next token fro the token stream."""
90+
"""Process next token from the token stream."""
8091
if (token.type == tokenize.NAME and token.string == self.function_name and
8192
self.state[-1] == STATE_INIT):
8293
self.state.append(STATE_FUNCTION_NAME)
@@ -129,6 +140,10 @@ def add_token(self, token: tokenize.TokenInfo) -> None:
129140
self.state.pop()
130141
elif self.state[-1] == STATE_ARGUMENT_LIST:
131142
self.arg_name = self.accumulator
143+
if not _ARG_NAME_RE.match(self.arg_name):
144+
# Invalid argument name.
145+
self.reset()
146+
return
132147

133148
if token.string == ')':
134149
self.state.pop()
@@ -152,6 +167,9 @@ def add_token(self, token: tokenize.TokenInfo) -> None:
152167
elif (token.type in (tokenize.NEWLINE, tokenize.ENDMARKER) and
153168
self.state[-1] in (STATE_INIT, STATE_RETURN_VALUE)):
154169
if self.state[-1] == STATE_RETURN_VALUE:
170+
if not is_valid_type(self.accumulator):
171+
self.reset()
172+
return
155173
self.ret_type = self.accumulator
156174
self.accumulator = ""
157175
self.state.pop()
@@ -166,6 +184,12 @@ def add_token(self, token: tokenize.TokenInfo) -> None:
166184
else:
167185
self.accumulator += token.string
168186

187+
def reset(self) -> None:
188+
self.state = [STATE_INIT]
189+
self.args = []
190+
self.found = False
191+
self.accumulator = ""
192+
169193
def get_signatures(self) -> List[FunctionSig]:
170194
"""Return sorted copy of the list of signatures found so far."""
171195
def has_arg(name: str, signature: FunctionSig) -> bool:
@@ -211,13 +235,7 @@ def is_unique_args(sig: FunctionSig) -> bool:
211235
"""return true if function argument names are unique"""
212236
return len(sig.args) == len(set((arg.name for arg in sig.args)))
213237

214-
# Warn about invalid signatures
215-
invalid_sigs = [sig for sig in sigs if not is_unique_args(sig)]
216-
if invalid_sigs:
217-
print("Warning: Invalid signatures found:", file=sys.stderr)
218-
print("\n".join(str(sig) for sig in invalid_sigs), file=sys.stderr)
219-
220-
# return only signatures, that have unique argument names. mypy fails on non-uqniue arg names
238+
# Return only signatures that have unique argument names. Mypy fails on non-uniqnue arg names.
221239
return [sig for sig in sigs if is_unique_args(sig)]
222240

223241

0 commit comments

Comments
 (0)