|
| 1 | +"""Basic introspection of modules.""" |
| 2 | + |
| 3 | +from typing import List, Optional, Union |
| 4 | +from types import ModuleType |
| 5 | +from multiprocessing import Process, Queue |
| 6 | +import importlib |
| 7 | +import inspect |
| 8 | +import os |
| 9 | +import pkgutil |
| 10 | +import queue |
| 11 | +import sys |
| 12 | + |
| 13 | + |
| 14 | +class ModuleProperties: |
| 15 | + def __init__(self, |
| 16 | + name: str, |
| 17 | + file: Optional[str], |
| 18 | + path: Optional[List[str]], |
| 19 | + all: Optional[List[str]], |
| 20 | + is_c_module: bool, |
| 21 | + subpackages: List[str]) -> None: |
| 22 | + self.name = name # __name__ attribute |
| 23 | + self.file = file # __file__ attribute |
| 24 | + self.path = path # __path__ attribute |
| 25 | + self.all = all # __all__ attribute |
| 26 | + self.is_c_module = is_c_module |
| 27 | + self.subpackages = subpackages |
| 28 | + |
| 29 | + |
| 30 | +def is_c_module(module: ModuleType) -> bool: |
| 31 | + if module.__dict__.get('__file__') is None: |
| 32 | + # Could be a namespace package. These must be handled through |
| 33 | + # introspection, since there is no source file. |
| 34 | + return True |
| 35 | + return os.path.splitext(module.__dict__['__file__'])[-1] in ['.so', '.pyd'] |
| 36 | + |
| 37 | + |
| 38 | +class InspectError(Exception): |
| 39 | + pass |
| 40 | + |
| 41 | + |
| 42 | +def get_package_properties(package_id: str) -> ModuleProperties: |
| 43 | + """Use runtime introspection to get information about a module/package.""" |
| 44 | + try: |
| 45 | + package = importlib.import_module(package_id) |
| 46 | + except BaseException as e: |
| 47 | + raise InspectError(str(e)) |
| 48 | + name = getattr(package, '__name__', None) |
| 49 | + file = getattr(package, '__file__', None) |
| 50 | + path = getattr(package, '__path__', None) # type: Optional[List[str]] |
| 51 | + if not isinstance(path, list): |
| 52 | + path = None |
| 53 | + pkg_all = getattr(package, '__all__', None) |
| 54 | + if pkg_all is not None: |
| 55 | + try: |
| 56 | + pkg_all = list(pkg_all) |
| 57 | + except Exception: |
| 58 | + pkg_all = None |
| 59 | + is_c = is_c_module(package) |
| 60 | + |
| 61 | + if path is None: |
| 62 | + # Object has no path; this means it's either a module inside a package |
| 63 | + # (and thus no sub-packages), or it could be a C extension package. |
| 64 | + if is_c: |
| 65 | + # This is a C extension module, now get the list of all sub-packages |
| 66 | + # using the inspect module |
| 67 | + subpackages = [package.__name__ + "." + name |
| 68 | + for name, val in inspect.getmembers(package) |
| 69 | + if inspect.ismodule(val) |
| 70 | + and val.__name__ == package.__name__ + "." + name] |
| 71 | + else: |
| 72 | + # It's a module inside a package. There's nothing else to walk/yield. |
| 73 | + subpackages = [] |
| 74 | + else: |
| 75 | + all_packages = pkgutil.walk_packages(path, prefix=package.__name__ + ".", |
| 76 | + onerror=lambda r: None) |
| 77 | + subpackages = [qualified_name for importer, qualified_name, ispkg in all_packages] |
| 78 | + return ModuleProperties(name=name, |
| 79 | + file=file, |
| 80 | + path=path, |
| 81 | + all=pkg_all, |
| 82 | + is_c_module=is_c, |
| 83 | + subpackages=subpackages) |
| 84 | + |
| 85 | + |
| 86 | +def worker(tasks: 'Queue[str]', |
| 87 | + results: 'Queue[Union[str, ModuleProperties]]', |
| 88 | + sys_path: List[str]) -> None: |
| 89 | + """The main loop of a worker introspection process.""" |
| 90 | + sys.path = sys_path |
| 91 | + while True: |
| 92 | + mod = tasks.get() |
| 93 | + try: |
| 94 | + prop = get_package_properties(mod) |
| 95 | + except InspectError as e: |
| 96 | + results.put(str(e)) |
| 97 | + continue |
| 98 | + results.put(prop) |
| 99 | + |
| 100 | + |
| 101 | +class ModuleInspect: |
| 102 | + """Perform runtime introspection of modules in a separate process. |
| 103 | +
|
| 104 | + Reuse the process for multiple modules for efficiency. However, if there is an |
| 105 | + error, retry using a fresh process to avoid cross-contamination of state between |
| 106 | + modules. |
| 107 | +
|
| 108 | + We use a separate process to isolate us from many side effects. For example, the |
| 109 | + import of a module may kill the current process, and we want to recover from that. |
| 110 | +
|
| 111 | + Always use in a with statement for proper clean-up: |
| 112 | +
|
| 113 | + with ModuleInspect() as m: |
| 114 | + p = m.get_package_properties('urllib.parse') |
| 115 | + """ |
| 116 | + |
| 117 | + def __init__(self) -> None: |
| 118 | + self._start() |
| 119 | + |
| 120 | + def _start(self) -> None: |
| 121 | + self.tasks = Queue() # type: Queue[str] |
| 122 | + self.results = Queue() # type: Queue[Union[ModuleProperties, str]] |
| 123 | + self.proc = Process(target=worker, args=(self.tasks, self.results, sys.path)) |
| 124 | + self.proc.start() |
| 125 | + self.counter = 0 # Number of successfull roundtrips |
| 126 | + |
| 127 | + def close(self) -> None: |
| 128 | + """Free any resources used.""" |
| 129 | + self.proc.terminate() |
| 130 | + |
| 131 | + def get_package_properties(self, package_id: str) -> ModuleProperties: |
| 132 | + """Return some properties of a module/package using runtime introspection. |
| 133 | +
|
| 134 | + Raise InspectError if the target couldn't be imported. |
| 135 | + """ |
| 136 | + self.tasks.put(package_id) |
| 137 | + res = self._get_from_queue() |
| 138 | + if res is None: |
| 139 | + # The process died; recover and report error. |
| 140 | + self._start() |
| 141 | + raise InspectError('Process died when importing %r' % package_id) |
| 142 | + if isinstance(res, str): |
| 143 | + # Error importing module |
| 144 | + if self.counter > 0: |
| 145 | + # Also try with a fresh process. Maybe one of the previous imports has |
| 146 | + # corrupted some global state. |
| 147 | + self.close() |
| 148 | + self._start() |
| 149 | + return self.get_package_properties(package_id) |
| 150 | + raise InspectError(res) |
| 151 | + self.counter += 1 |
| 152 | + return res |
| 153 | + |
| 154 | + def _get_from_queue(self) -> Union[ModuleProperties, str, None]: |
| 155 | + """Get value from the queue. |
| 156 | +
|
| 157 | + Return the value read from the queue, or None if the process unexpectedly died. |
| 158 | + """ |
| 159 | + max_iter = 100 |
| 160 | + n = 0 |
| 161 | + while True: |
| 162 | + if n == max_iter: |
| 163 | + raise RuntimeError('Timeout waiting for subprocess') |
| 164 | + try: |
| 165 | + return self.results.get(timeout=0.05) |
| 166 | + except queue.Empty: |
| 167 | + if not self.proc.is_alive(): |
| 168 | + return None |
| 169 | + n += 1 |
| 170 | + |
| 171 | + def __enter__(self) -> 'ModuleInspect': |
| 172 | + return self |
| 173 | + |
| 174 | + def __exit__(self, *args: object) -> None: |
| 175 | + self.close() |
0 commit comments