Skip to content

[mypyc] Various improvements to annotated html generation #18848

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 28, 2025
Merged
176 changes: 148 additions & 28 deletions mypyc/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,39 @@

from mypy.build import BuildResult
from mypy.nodes import (
AssignmentStmt,
CallExpr,
ClassDef,
Decorator,
DictionaryComprehension,
Expression,
ForStmt,
FuncDef,
GeneratorExpr,
IndexExpr,
LambdaExpr,
MemberExpr,
MypyFile,
NamedTupleExpr,
NameExpr,
NewTypeExpr,
Node,
OpExpr,
RefExpr,
TupleExpr,
TypedDictExpr,
TypeInfo,
TypeVarExpr,
Var,
WithStmt,
)
from mypy.traverser import TraverserVisitor
from mypy.types import AnyType, Instance, ProperType, Type, TypeOfAny, get_proper_type
from mypy.util import FancyFormatter
from mypyc.ir.func_ir import FuncIR
from mypyc.ir.module_ir import ModuleIR
from mypyc.ir.ops import CallC, LoadLiteral, LoadStatic, Value
from mypyc.irbuild.mapper import Mapper


class Annotation:
Expand Down Expand Up @@ -71,18 +84,21 @@ def __init__(self, message: str, priority: int = 1) -> None:

stdlib_hints: Final = {
"functools.partial": Annotation(
'"functools.partial" is inefficient in compiled code.', priority=2
'"functools.partial" is inefficient in compiled code.', priority=3
),
"itertools.chain": Annotation(
'"itertools.chain" is inefficient in compiled code (hint: replace with for loops).',
priority=2,
priority=3,
),
"itertools.groupby": Annotation(
'"itertools.groupby" is inefficient in compiled code.', priority=2
'"itertools.groupby" is inefficient in compiled code.', priority=3
),
"itertools.islice": Annotation(
'"itertools.islice" is inefficient in compiled code (hint: replace with for loop over index range).',
priority=2,
priority=3,
),
"copy.deepcopy": Annotation(
'"copy.deepcopy" tends to be slow. Make a shallow copy if possible.', priority=2
),
}

Expand Down Expand Up @@ -127,14 +143,16 @@ def __init__(self, path: str, annotations: dict[int, list[Annotation]]) -> None:


def generate_annotated_html(
html_fnam: str, result: BuildResult, modules: dict[str, ModuleIR]
html_fnam: str, result: BuildResult, modules: dict[str, ModuleIR], mapper: Mapper
) -> None:
annotations = []
for mod, mod_ir in modules.items():
path = result.graph[mod].path
tree = result.graph[mod].tree
assert tree is not None
annotations.append(generate_annotations(path or "<source>", tree, mod_ir, result.types))
annotations.append(
generate_annotations(path or "<source>", tree, mod_ir, result.types, mapper)
)
html = generate_html_report(annotations)
with open(html_fnam, "w") as f:
f.write(html)
Expand All @@ -145,15 +163,18 @@ def generate_annotated_html(


def generate_annotations(
path: str, tree: MypyFile, ir: ModuleIR, type_map: dict[Expression, Type]
path: str, tree: MypyFile, ir: ModuleIR, type_map: dict[Expression, Type], mapper: Mapper
) -> AnnotatedSource:
anns = {}
for func_ir in ir.functions:
anns.update(function_annotations(func_ir, tree))
visitor = ASTAnnotateVisitor(type_map)
visitor = ASTAnnotateVisitor(type_map, mapper)
for defn in tree.defs:
defn.accept(visitor)
anns.update(visitor.anns)
for line in visitor.ignored_lines:
if line in anns:
del anns[line]
return AnnotatedSource(path, anns)


Expand All @@ -168,18 +189,28 @@ def function_annotations(func_ir: FuncIR, tree: MypyFile) -> dict[int, list[Anno
ann: str | Annotation | None = None
if name == "CPyObject_GetAttr":
attr_name = get_str_literal(op.args[1])
if attr_name == "__prepare__":
# These attributes are internal to mypyc/CPython, and the user has
# little control over them.
if attr_name in ("__prepare__", "GeneratorExit", "StopIteration"):
# These attributes are internal to mypyc/CPython, and/or accessed
# implicitly in generated code. The user has little control over
# them.
ann = None
elif attr_name:
ann = f'Get non-native attribute "{attr_name}".'
else:
ann = "Dynamic attribute lookup."
elif name == "PyObject_SetAttr":
attr_name = get_str_literal(op.args[1])
if attr_name == "__mypyc_attrs__":
# This is set implicitly and can't be avoided.
ann = None
elif attr_name:
ann = f'Set non-native attribute "{attr_name}".'
else:
ann = "Dynamic attribute set."
elif name == "PyObject_VectorcallMethod":
method_name = get_str_literal(op.args[0])
if method_name:
ann = f'Call non-native method "{method_name}".'
ann = f'Call non-native method "{method_name}" (it may be defined in a non-native class, or decorated).'
else:
ann = "Dynamic method call."
elif name in op_hints:
Expand Down Expand Up @@ -218,10 +249,12 @@ def function_annotations(func_ir: FuncIR, tree: MypyFile) -> dict[int, list[Anno
class ASTAnnotateVisitor(TraverserVisitor):
"""Generate annotations from mypy AST and inferred types."""

def __init__(self, type_map: dict[Expression, Type]) -> None:
def __init__(self, type_map: dict[Expression, Type], mapper: Mapper) -> None:
self.anns: dict[int, list[Annotation]] = {}
self.ignored_lines: set[int] = set()
self.func_depth = 0
self.type_map = type_map
self.mapper = mapper

def visit_func_def(self, o: FuncDef, /) -> None:
if self.func_depth > 0:
Expand All @@ -235,21 +268,84 @@ def visit_func_def(self, o: FuncDef, /) -> None:
self.func_depth -= 1

def visit_for_stmt(self, o: ForStmt, /) -> None:
typ = self.get_type(o.expr)
if isinstance(typ, AnyType):
self.annotate(o.expr, 'For loop uses generic operations (iterable has type "Any").')
elif isinstance(typ, Instance) and typ.type.fullname in (
"typing.Iterable",
"typing.Iterator",
"typing.Sequence",
"typing.MutableSequence",
):
self.annotate(
o.expr,
f'For loop uses generic operations (iterable has the abstract type "{typ.type.fullname}").',
)
self.check_iteration([o.expr], "For loop")
super().visit_for_stmt(o)

def visit_dictionary_comprehension(self, o: DictionaryComprehension, /) -> None:
self.check_iteration(o.sequences, "Comprehension")
super().visit_dictionary_comprehension(o)

def visit_generator_expr(self, o: GeneratorExpr, /) -> None:
self.check_iteration(o.sequences, "Comprehension or generator")
super().visit_generator_expr(o)

def check_iteration(self, expressions: list[Expression], kind: str) -> None:
for expr in expressions:
typ = self.get_type(expr)
if isinstance(typ, AnyType):
self.annotate(expr, f'{kind} uses generic operations (iterable has type "Any").')
elif isinstance(typ, Instance) and typ.type.fullname in (
"typing.Iterable",
"typing.Iterator",
"typing.Sequence",
"typing.MutableSequence",
):
self.annotate(
expr,
f'{kind} uses generic operations (iterable has the abstract type "{typ.type.fullname}").',
)

def visit_class_def(self, o: ClassDef, /) -> None:
super().visit_class_def(o)
if self.func_depth == 0:
# Don't complain about base classes at top level
for base in o.base_type_exprs:
self.ignored_lines.add(base.line)

for s in o.defs.body:
if isinstance(s, AssignmentStmt):
# Don't complain about attribute initializers
self.ignored_lines.add(s.line)
elif isinstance(s, Decorator):
# Don't complain about decorator definitions that generate some
# dynamic operations. This is a bit heavy-handed.
self.ignored_lines.add(s.func.line)

def visit_with_stmt(self, o: WithStmt, /) -> None:
for expr in o.expr:
if isinstance(expr, CallExpr) and isinstance(expr.callee, RefExpr):
node = expr.callee.node
if isinstance(node, Decorator):
if any(
isinstance(d, RefExpr)
and d.node
and d.node.fullname == "contextlib.contextmanager"
for d in node.decorators
):
self.annotate(
expr,
f'"{node.name}" uses @contextmanager, which is slow '
+ "in compiled code. Use a native class with "
+ '"__enter__" and "__exit__" methods instead.',
priority=3,
)
super().visit_with_stmt(o)

def visit_assignment_stmt(self, o: AssignmentStmt, /) -> None:
special_form = False
if self.func_depth == 0:
analyzed: Expression | None = o.rvalue
if isinstance(o.rvalue, (CallExpr, IndexExpr, OpExpr)):
analyzed = o.rvalue.analyzed
if o.is_alias_def or isinstance(
analyzed, (TypeVarExpr, NamedTupleExpr, TypedDictExpr, NewTypeExpr)
):
special_form = True
if special_form:
# TODO: Ignore all lines if multi-line
self.ignored_lines.add(o.line)
super().visit_assignment_stmt(o)

def visit_name_expr(self, o: NameExpr, /) -> None:
if ann := stdlib_hints.get(o.fullname):
self.annotate(o, ann)
Expand All @@ -268,6 +364,30 @@ def visit_call_expr(self, o: CallExpr, /) -> None:
):
arg = o.args[1]
self.check_isinstance_arg(arg)
elif isinstance(o.callee, RefExpr) and isinstance(o.callee.node, TypeInfo):
info = o.callee.node
class_ir = self.mapper.type_to_ir.get(info)
if (class_ir and not class_ir.is_ext_class) or (
class_ir is None and not info.fullname.startswith("builtins.")
):
self.annotate(
o, f'Creating an instance of non-native class "{info.name}" ' + "is slow.", 2
)
elif class_ir and class_ir.is_augmented:
self.annotate(
o,
f'Class "{info.name}" is only partially native, and '
+ "constructing an instance is slow.",
2,
)
elif isinstance(o.callee, RefExpr) and isinstance(o.callee.node, Decorator):
decorator = o.callee.node
if self.mapper.is_native_ref_expr(o.callee):
self.annotate(
o,
f'Calling a decorated function ("{decorator.name}") is inefficient, even if it\'s native.',
2,
)

def check_isinstance_arg(self, arg: Expression) -> None:
if isinstance(arg, RefExpr):
Expand All @@ -287,9 +407,9 @@ def visit_lambda_expr(self, o: LambdaExpr, /) -> None:
)
super().visit_lambda_expr(o)

def annotate(self, o: Node, ann: str | Annotation) -> None:
def annotate(self, o: Node, ann: str | Annotation, priority: int = 1) -> None:
if isinstance(ann, str):
ann = Annotation(ann)
ann = Annotation(ann, priority=priority)
self.anns.setdefault(o.line, []).append(ann)

def get_type(self, e: Expression) -> ProperType:
Expand Down
4 changes: 2 additions & 2 deletions mypyc/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def generate_c(
print(f"Parsed and typechecked in {t1 - t0:.3f}s")

errors = Errors(options)
modules, ctext = emitmodule.compile_modules_to_c(
modules, ctext, mapper = emitmodule.compile_modules_to_c(
result, compiler_options=compiler_options, errors=errors, groups=groups
)
t2 = time.time()
Expand All @@ -255,7 +255,7 @@ def generate_c(
print(f"Compiled to C in {t2 - t1:.3f}s")

if options.mypyc_annotation_file:
generate_annotated_html(options.mypyc_annotation_file, result, modules)
generate_annotated_html(options.mypyc_annotation_file, result, modules, mapper)

return ctext, "\n".join(format_modules(modules))

Expand Down
6 changes: 3 additions & 3 deletions mypyc/codegen/emitmodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ def load_scc_from_cache(

def compile_modules_to_c(
result: BuildResult, compiler_options: CompilerOptions, errors: Errors, groups: Groups
) -> tuple[ModuleIRs, list[FileContents]]:
) -> tuple[ModuleIRs, list[FileContents], Mapper]:
"""Compile Python module(s) to the source of Python C extension modules.

This generates the source code for the "shared library" module
Expand Down Expand Up @@ -427,12 +427,12 @@ def compile_modules_to_c(

modules = compile_modules_to_ir(result, mapper, compiler_options, errors)
if errors.num_errors > 0:
return {}, []
return {}, [], Mapper({})

ctext = compile_ir_to_c(groups, modules, result, mapper, compiler_options)
write_cache(modules, result, group_map, ctext)

return modules, [ctext[name] for _, name in groups]
return modules, [ctext[name] for _, name in groups], mapper


def generate_function_declaration(fn: FuncIR, emitter: Emitter) -> None:
Expand Down
Loading