Skip to content

More improvements to stubgen #7951

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 38 commits into from
Nov 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
037ce58
Continue operation even if imported module raises SystemExit
JukkaL Nov 5, 2019
86e9ad4
Use separate process for module introspection
JukkaL Nov 5, 2019
9923a7a
Recover from helper process death
JukkaL Nov 5, 2019
0722c7d
Display message when we start processing ASTs
JukkaL Nov 5, 2019
db0a8d5
Automatically skip 'setup' modules and other non-library modules
JukkaL Nov 5, 2019
ca1923a
Add additional vendored package naming convention
JukkaL Nov 6, 2019
7e10f4c
Fallback to finding module using sys.path if import fails
JukkaL Nov 6, 2019
8e1b5b8
Import all names imported from the current package by default
JukkaL Nov 6, 2019
0728e70
Silently ignore invalid docstring signatures
JukkaL Nov 7, 2019
67e2db4
Also use Python 2 sys.path to find files
JukkaL Nov 7, 2019
2cb948c
Do not generate invalid argument names from docstrings
JukkaL Nov 7, 2019
514ae1d
Be more aggressive about ignoring various dunders
JukkaL Nov 7, 2019
b818592
Avoid generating both an attribute and a property definition
JukkaL Nov 7, 2019
6237ec4
Avoid None return type for some methods that should always return a v…
JukkaL Nov 8, 2019
b4c8f21
Fix indent of named tuples
JukkaL Nov 8, 2019
c0dda3b
Fix some imports from vendorex six.moves
JukkaL Nov 8, 2019
0d6e545
Fall back to Any for a dynamic named tuple definition
JukkaL Nov 8, 2019
97f2bf4
Don't crash on invalid property definition
JukkaL Nov 8, 2019
2affb99
Clear decorators if we skip a function
JukkaL Nov 8, 2019
29acbed
Fix @classmethod without corresponding 'def' in C module
JukkaL Nov 8, 2019
75a094b
Catch more bad types and bad arguments names in C stubs
JukkaL Nov 8, 2019
fb95598
Fix stripping of C type names
JukkaL Nov 8, 2019
17440b4
Misc tweaks
JukkaL Nov 14, 2019
7968a6a
Fixes
JukkaL Nov 14, 2019
73fb604
Fix lint
JukkaL Nov 15, 2019
2cc05d5
'None' is a valid type
JukkaL Nov 15, 2019
7e3826c
Update test case
JukkaL Nov 15, 2019
e8a1498
Fix Python 3.5
JukkaL Nov 15, 2019
9634aa9
Another Python 3.5 fix
JukkaL Nov 15, 2019
18591d3
Raise exception if subprocess gets stuck
JukkaL Nov 15, 2019
99c6dad
Attempt to fix compiled
JukkaL Nov 15, 2019
78771f4
Attempt to fix compiled
JukkaL Nov 15, 2019
1a098c6
Attempt to fix compiled
JukkaL Nov 15, 2019
d35ec7b
Fix unused import
JukkaL Nov 15, 2019
d892a7a
Fix test case on Windows
JukkaL Nov 15, 2019
4724d00
Pass sys.path to the worker function
JukkaL Nov 15, 2019
1bc5af7
Respond to feedback
JukkaL Nov 15, 2019
fd088b1
Skip test case on Windows
JukkaL Nov 15, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 175 additions & 0 deletions mypy/moduleinspect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
"""Basic introspection of modules."""

from typing import List, Optional, Union
from types import ModuleType
from multiprocessing import Process, Queue
import importlib
import inspect
import os
import pkgutil
import queue
import sys


class ModuleProperties:
def __init__(self,
name: str,
file: Optional[str],
path: Optional[List[str]],
all: Optional[List[str]],
is_c_module: bool,
subpackages: List[str]) -> None:
self.name = name # __name__ attribute
self.file = file # __file__ attribute
self.path = path # __path__ attribute
self.all = all # __all__ attribute
self.is_c_module = is_c_module
self.subpackages = subpackages


def is_c_module(module: ModuleType) -> bool:
if module.__dict__.get('__file__') is None:
# Could be a namespace package. These must be handled through
# introspection, since there is no source file.
return True
return os.path.splitext(module.__dict__['__file__'])[-1] in ['.so', '.pyd']


class InspectError(Exception):
pass


def get_package_properties(package_id: str) -> ModuleProperties:
"""Use runtime introspection to get information about a module/package."""
try:
package = importlib.import_module(package_id)
except BaseException as e:
raise InspectError(str(e))
name = getattr(package, '__name__', None)
file = getattr(package, '__file__', None)
path = getattr(package, '__path__', None) # type: Optional[List[str]]
if not isinstance(path, list):
path = None
pkg_all = getattr(package, '__all__', None)
if pkg_all is not None:
try:
pkg_all = list(pkg_all)
except Exception:
pkg_all = None
is_c = is_c_module(package)

if path is None:
# Object has no path; this means it's either a module inside a package
# (and thus no sub-packages), or it could be a C extension package.
if is_c:
# This is a C extension module, now get the list of all sub-packages
# using the inspect module
subpackages = [package.__name__ + "." + name
for name, val in inspect.getmembers(package)
if inspect.ismodule(val)
and val.__name__ == package.__name__ + "." + name]
else:
# It's a module inside a package. There's nothing else to walk/yield.
subpackages = []
else:
all_packages = pkgutil.walk_packages(path, prefix=package.__name__ + ".",
onerror=lambda r: None)
subpackages = [qualified_name for importer, qualified_name, ispkg in all_packages]
return ModuleProperties(name=name,
file=file,
path=path,
all=pkg_all,
is_c_module=is_c,
subpackages=subpackages)


def worker(tasks: 'Queue[str]',
results: 'Queue[Union[str, ModuleProperties]]',
sys_path: List[str]) -> None:
"""The main loop of a worker introspection process."""
sys.path = sys_path
while True:
mod = tasks.get()
try:
prop = get_package_properties(mod)
except InspectError as e:
results.put(str(e))
continue
results.put(prop)


class ModuleInspect:
"""Perform runtime introspection of modules in a separate process.

Reuse the process for multiple modules for efficiency. However, if there is an
error, retry using a fresh process to avoid cross-contamination of state between
modules.

We use a separate process to isolate us from many side effects. For example, the
import of a module may kill the current process, and we want to recover from that.

Always use in a with statement for proper clean-up:

with ModuleInspect() as m:
p = m.get_package_properties('urllib.parse')
"""

def __init__(self) -> None:
self._start()

def _start(self) -> None:
self.tasks = Queue() # type: Queue[str]
self.results = Queue() # type: Queue[Union[ModuleProperties, str]]
self.proc = Process(target=worker, args=(self.tasks, self.results, sys.path))
self.proc.start()
self.counter = 0 # Number of successfull roundtrips

def close(self) -> None:
"""Free any resources used."""
self.proc.terminate()

def get_package_properties(self, package_id: str) -> ModuleProperties:
"""Return some properties of a module/package using runtime introspection.

Raise InspectError if the target couldn't be imported.
"""
self.tasks.put(package_id)
res = self._get_from_queue()
if res is None:
# The process died; recover and report error.
self._start()
raise InspectError('Process died when importing %r' % package_id)
if isinstance(res, str):
# Error importing module
if self.counter > 0:
# Also try with a fresh process. Maybe one of the previous imports has
# corrupted some global state.
self.close()
self._start()
return self.get_package_properties(package_id)
raise InspectError(res)
self.counter += 1
return res

def _get_from_queue(self) -> Union[ModuleProperties, str, None]:
"""Get value from the queue.

Return the value read from the queue, or None if the process unexpectedly died.
"""
max_iter = 100
n = 0
while True:
if n == max_iter:
raise RuntimeError('Timeout waiting for subprocess')
try:
return self.results.get(timeout=0.05)
except queue.Empty:
if not self.proc.is_alive():
return None
n += 1

def __enter__(self) -> 'ModuleInspect':
return self

def __exit__(self, *args: object) -> None:
self.close()
29 changes: 18 additions & 11 deletions mypy/semanal.py
Original file line number Diff line number Diff line change
Expand Up @@ -865,19 +865,26 @@ def analyze_property_with_multi_part_definition(self, defn: OverloadedFuncDef) -
defn.is_property = True
items = defn.items
first_item = cast(Decorator, defn.items[0])
for item in items[1:]:
if isinstance(item, Decorator) and len(item.decorators) == 1:
node = item.decorators[0]
if isinstance(node, MemberExpr):
if node.name == 'setter':
# The first item represents the entire property.
first_item.var.is_settable_property = True
# Get abstractness from the original definition.
item.func.is_abstract = first_item.func.is_abstract
else:
self.fail("Decorated property not supported", item)
deleted_items = []
for i, item in enumerate(items[1:]):
if isinstance(item, Decorator):
if len(item.decorators) == 1:
node = item.decorators[0]
if isinstance(node, MemberExpr):
if node.name == 'setter':
# The first item represents the entire property.
first_item.var.is_settable_property = True
# Get abstractness from the original definition.
item.func.is_abstract = first_item.func.is_abstract
else:
self.fail("Decorated property not supported", item)
item.func.accept(self)
else:
self.fail('Unexpected definition for property "{}"'.format(first_item.func.name),
item)
deleted_items.append(i + 1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if the extra item is also decorated (with some unrelated decorator)? Maybe add a test for this?

for i in reversed(deleted_items):
del items[i]

def add_function_to_symbol_table(self, func: Union[FuncDef, OverloadedFuncDef]) -> None:
if self.is_class_scope():
Expand Down
44 changes: 31 additions & 13 deletions mypy/stubdoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
"""
import re
import io
import sys
import contextlib
import tokenize

Expand All @@ -18,14 +17,25 @@
Sig = Tuple[str, str]


_TYPE_RE = re.compile(r'^[a-zA-Z_][\w\[\], ]*(\.[a-zA-Z_][\w\[\], ]*)*$') # type: Final
_ARG_NAME_RE = re.compile(r'\**[A-Za-z_][A-Za-z0-9_]*$') # type: Final


def is_valid_type(s: str) -> bool:
"""Try to determine whether a string might be a valid type annotation."""
if s in ('True', 'False', 'retval'):
return False
if ',' in s and '[' not in s:
return False
return _TYPE_RE.match(s) is not None


class ArgSig:
"""Signature info for a single argument."""

_TYPE_RE = re.compile(r'^[a-zA-Z_][\w\[\], ]*(\.[a-zA-Z_][\w\[\], ]*)*$') # type: Final

def __init__(self, name: str, type: Optional[str] = None, default: bool = False):
self.name = name
if type and not self._TYPE_RE.match(type):
if type and not is_valid_type(type):
raise ValueError("Invalid type: " + type)
self.type = type
# Does this argument have a default value?
Expand Down Expand Up @@ -60,7 +70,8 @@ def __eq__(self, other: Any) -> bool:


class DocStringParser:
"""Parse function signstures in documentation."""
"""Parse function signatures in documentation."""

def __init__(self, function_name: str) -> None:
# Only search for signatures of function with this name.
self.function_name = function_name
Expand All @@ -76,7 +87,7 @@ def __init__(self, function_name: str) -> None:
self.signatures = [] # type: List[FunctionSig]

def add_token(self, token: tokenize.TokenInfo) -> None:
"""Process next token fro the token stream."""
"""Process next token from the token stream."""
if (token.type == tokenize.NAME and token.string == self.function_name and
self.state[-1] == STATE_INIT):
self.state.append(STATE_FUNCTION_NAME)
Expand Down Expand Up @@ -129,6 +140,10 @@ def add_token(self, token: tokenize.TokenInfo) -> None:
self.state.pop()
elif self.state[-1] == STATE_ARGUMENT_LIST:
self.arg_name = self.accumulator
if not _ARG_NAME_RE.match(self.arg_name):
# Invalid argument name.
self.reset()
return

if token.string == ')':
self.state.pop()
Expand All @@ -152,6 +167,9 @@ def add_token(self, token: tokenize.TokenInfo) -> None:
elif (token.type in (tokenize.NEWLINE, tokenize.ENDMARKER) and
self.state[-1] in (STATE_INIT, STATE_RETURN_VALUE)):
if self.state[-1] == STATE_RETURN_VALUE:
if not is_valid_type(self.accumulator):
self.reset()
return
self.ret_type = self.accumulator
self.accumulator = ""
self.state.pop()
Expand All @@ -166,6 +184,12 @@ def add_token(self, token: tokenize.TokenInfo) -> None:
else:
self.accumulator += token.string

def reset(self) -> None:
self.state = [STATE_INIT]
self.args = []
self.found = False
self.accumulator = ""

def get_signatures(self) -> List[FunctionSig]:
"""Return sorted copy of the list of signatures found so far."""
def has_arg(name: str, signature: FunctionSig) -> bool:
Expand Down Expand Up @@ -211,13 +235,7 @@ def is_unique_args(sig: FunctionSig) -> bool:
"""return true if function argument names are unique"""
return len(sig.args) == len(set((arg.name for arg in sig.args)))

# Warn about invalid signatures
invalid_sigs = [sig for sig in sigs if not is_unique_args(sig)]
if invalid_sigs:
print("Warning: Invalid signatures found:", file=sys.stderr)
print("\n".join(str(sig) for sig in invalid_sigs), file=sys.stderr)

# return only signatures, that have unique argument names. mypy fails on non-uqniue arg names
# Return only signatures that have unique argument names. Mypy fails on non-uniqnue arg names.
return [sig for sig in sigs if is_unique_args(sig)]


Expand Down
Loading