Skip to content

Commit 6cc3462

Browse files
Michael0x2agvanrossum
authored andcommitted
Modify cache file formats to shrink overall cache size (#2108)
* Compress usually-false flags when serializing This commit modifies several mypy nodes that tend to contain a large number of usually-false boolean flags to store a compressed version of those flags. More precisely, rather then storing each flag and its boolean value as a key-value pair in the output JSON, we now store a list of the flags that are known to be true at time of serialization. When deserializing, we iterate over the list of flags and set each attribute to True, overriding the default value of False. This does not result in any significant speedups, but does end up reducing the size of the cache by a non-trivial factor. * Stop serializing Argument.type_annotation This commit modifies the serialization logic of Argument to stop saving the type_annotation data. After some investigation, it appears as if that data is never actually used once the cache data is loaded by mypy, and removing it would help shrink the cache size by a respectable margin. * Stop caching all arguments in serialized FuncDefs This commit modifies the serialization logic and some of the surrounding code for FuncDefs so we no longer serialize the full argument list -- instead, we just stick to serializing the argument names and the argument kinds. As with the other commits, this does not really improve speed, but does end up shrinking the cache size by a fair amount. It would have been nice to avoid storing the arg kinds and arg names altogether since the information can usually be found within the corresponding callable type, but unfortunately, a FuncDef is not guaranteed to actually have a type in all instances -- sometimes, the type is None. This is a problem since we need the argument names and kinds to be able to infer a fallback type later on in mypy.
1 parent 590c15f commit 6cc3462

File tree

2 files changed

+60
-59
lines changed

2 files changed

+60
-59
lines changed

mypy/fixup.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,6 @@ def visit_func_def(self, func: FuncDef) -> None:
9090
func.info = self.current_info
9191
if func.type is not None:
9292
func.type.accept(self.type_fixer)
93-
for arg in func.arguments:
94-
if arg.type_annotation is not None:
95-
arg.type_annotation.accept(self.type_fixer)
9693

9794
def visit_overloaded_func_def(self, o: OverloadedFuncDef) -> None:
9895
if self.current_info is not None:

mypy/nodes.py

Lines changed: 60 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -387,11 +387,11 @@ def set_line(self, target: Union[Token, Node, int]) -> Node:
387387
self.initialization_statement.lvalues[0].set_line(self.line)
388388

389389
def serialize(self) -> JsonDict:
390+
# Note: we are deliberately not saving the type annotation since
391+
# it is not used by later stages of mypy.
390392
data = {'.class': 'Argument',
391393
'kind': self.kind,
392394
'variable': self.variable.serialize(),
393-
'type_annotation': (None if self.type_annotation is None
394-
else self.type_annotation.serialize()),
395395
} # type: JsonDict
396396
# TODO: initializer?
397397
return data
@@ -400,14 +400,15 @@ def serialize(self) -> JsonDict:
400400
def deserialize(cls, data: JsonDict) -> 'Argument':
401401
assert data['.class'] == 'Argument'
402402
return Argument(Var.deserialize(data['variable']),
403-
(None if data.get('type_annotation') is None
404-
else mypy.types.Type.deserialize(data['type_annotation'])),
403+
None,
405404
None, # TODO: initializer?
406405
kind=data['kind'])
407406

408407

409408
class FuncItem(FuncBase):
410409
arguments = [] # type: List[Argument]
410+
arg_names = [] # type: List[str]
411+
arg_kinds = [] # type: List[int]
411412
# Minimum number of arguments
412413
min_args = 0
413414
# Maximum number of positional arguments, -1 if no explicit limit (*args not included)
@@ -423,11 +424,17 @@ class FuncItem(FuncBase):
423424
# Variants of function with type variables with values expanded
424425
expanded = None # type: List[FuncItem]
425426

427+
FLAGS = [
428+
'is_overload', 'is_generator', 'is_coroutine', 'is_awaitable_coroutine',
429+
'is_static', 'is_class',
430+
]
431+
426432
def __init__(self, arguments: List[Argument], body: 'Block',
427433
typ: 'mypy.types.FunctionLike' = None) -> None:
428434
self.arguments = arguments
429-
arg_kinds = [arg.kind for arg in self.arguments]
430-
self.max_pos = arg_kinds.count(ARG_POS) + arg_kinds.count(ARG_OPT)
435+
self.arg_names = [arg.variable.name() for arg in self.arguments]
436+
self.arg_kinds = [arg.kind for arg in self.arguments]
437+
self.max_pos = self.arg_kinds.count(ARG_POS) + self.arg_kinds.count(ARG_OPT)
431438
self.body = body
432439
self.type = typ
433440
self.expanded = []
@@ -462,6 +469,10 @@ class FuncDef(FuncItem, Statement):
462469
is_property = False
463470
original_def = None # type: Union[None, FuncDef, Var] # Original conditional definition
464471

472+
FLAGS = FuncItem.FLAGS + [
473+
'is_decorated', 'is_conditional', 'is_abstract', 'is_property'
474+
]
475+
465476
def __init__(self,
466477
name: str, # Function name
467478
arguments: List[Argument],
@@ -480,20 +491,19 @@ def is_constructor(self) -> bool:
480491
return self.info is not None and self._name == '__init__'
481492

482493
def serialize(self) -> JsonDict:
494+
# We're deliberating omitting arguments and storing only arg_names and
495+
# arg_kinds for space-saving reasons (arguments is not used in later
496+
# stages of mypy).
497+
# TODO: After a FuncDef is deserialized, the only time we use `arg_names`
498+
# and `arg_kinds` is when `type` is None and we need to infer a type. Can
499+
# we store the inferred type ahead of time?
483500
return {'.class': 'FuncDef',
484501
'name': self._name,
485502
'fullname': self._fullname,
486-
'arguments': [a.serialize() for a in self.arguments],
503+
'arg_names': self.arg_names,
504+
'arg_kinds': self.arg_kinds,
487505
'type': None if self.type is None else self.type.serialize(),
488-
'is_property': self.is_property,
489-
'is_overload': self.is_overload,
490-
'is_generator': self.is_generator,
491-
'is_coroutine': self.is_coroutine,
492-
'is_static': self.is_static,
493-
'is_class': self.is_class,
494-
'is_decorated': self.is_decorated,
495-
'is_conditional': self.is_conditional,
496-
'is_abstract': self.is_abstract,
506+
'flags': get_flags(self, FuncDef.FLAGS),
497507
# TODO: Do we need expanded, original_def?
498508
}
499509

@@ -502,21 +512,19 @@ def deserialize(cls, data: JsonDict) -> 'FuncDef':
502512
assert data['.class'] == 'FuncDef'
503513
body = Block([])
504514
ret = FuncDef(data['name'],
505-
[Argument.deserialize(a) for a in data['arguments']],
515+
[],
506516
body,
507517
(None if data['type'] is None
508518
else mypy.types.FunctionLike.deserialize(data['type'])))
509519
ret._fullname = data['fullname']
510-
ret.is_property = data['is_property']
511-
ret.is_overload = data['is_overload']
512-
ret.is_generator = data['is_generator']
513-
ret.is_coroutine = data['is_coroutine']
514-
ret.is_static = data['is_static']
515-
ret.is_class = data['is_class']
516-
ret.is_decorated = data['is_decorated']
517-
ret.is_conditional = data['is_conditional']
518-
ret.is_abstract = data['is_abstract']
520+
set_flags(ret, data['flags'])
519521
# NOTE: ret.info is set in the fixup phase.
522+
ret.arg_names = data['arg_names']
523+
ret.arg_kinds = data['arg_kinds']
524+
# Mark these as 'None' so that future uses will trigger an error
525+
ret.arguments = None
526+
ret.max_pos = None
527+
ret.min_args = None
520528
return ret
521529

522530

@@ -587,6 +595,11 @@ class Var(SymbolNode, Statement):
587595
# parse for some reason (eg a silenced module)
588596
is_suppressed_import = False
589597

598+
FLAGS = [
599+
'is_self', 'is_ready', 'is_initialized_in_class', 'is_staticmethod',
600+
'is_classmethod', 'is_property', 'is_settable_property', 'is_suppressed_import'
601+
]
602+
590603
def __init__(self, name: str, type: 'mypy.types.Type' = None) -> None:
591604
self._name = name
592605
self.type = type
@@ -610,13 +623,7 @@ def serialize(self) -> JsonDict:
610623
'name': self._name,
611624
'fullname': self._fullname,
612625
'type': None if self.type is None else self.type.serialize(),
613-
'is_self': self.is_self,
614-
'is_initialized_in_class': self.is_initialized_in_class,
615-
'is_staticmethod': self.is_staticmethod,
616-
'is_classmethod': self.is_classmethod,
617-
'is_property': self.is_property,
618-
'is_settable_property': self.is_settable_property,
619-
'is_suppressed_import': self.is_suppressed_import,
626+
'flags': get_flags(self, Var.FLAGS),
620627
} # type: JsonDict
621628
return data
622629

@@ -627,13 +634,7 @@ def deserialize(cls, data: JsonDict) -> 'Var':
627634
type = None if data['type'] is None else mypy.types.Type.deserialize(data['type'])
628635
v = Var(name, type)
629636
v._fullname = data['fullname']
630-
v.is_self = data['is_self']
631-
v.is_initialized_in_class = data['is_initialized_in_class']
632-
v.is_staticmethod = data['is_staticmethod']
633-
v.is_classmethod = data['is_classmethod']
634-
v.is_property = data['is_property']
635-
v.is_settable_property = data['is_settable_property']
636-
v.is_suppressed_import = data['is_suppressed_import']
637+
set_flags(v, data['flags'])
637638
return v
638639

639640

@@ -1837,6 +1838,11 @@ class is generic then it will be a type constructor of higher kind.
18371838
# Alternative to fullname() for 'anonymous' classes.
18381839
alt_fullname = None # type: Optional[str]
18391840

1841+
FLAGS = [
1842+
'is_abstract', 'is_enum', 'fallback_to_any', 'is_named_tuple',
1843+
'is_newtype', 'is_dummy'
1844+
]
1845+
18401846
def __init__(self, names: 'SymbolTable', defn: ClassDef, module_name: str) -> None:
18411847
"""Initialize a TypeInfo."""
18421848
self.names = names
@@ -2000,16 +2006,12 @@ def serialize(self) -> Union[str, JsonDict]:
20002006
'alt_fullname': self.alt_fullname,
20012007
'names': self.names.serialize(self.alt_fullname or self.fullname()),
20022008
'defn': self.defn.serialize(),
2003-
'is_abstract': self.is_abstract,
20042009
'abstract_attributes': self.abstract_attributes,
2005-
'is_enum': self.is_enum,
2006-
'fallback_to_any': self.fallback_to_any,
20072010
'type_vars': self.type_vars,
20082011
'bases': [b.serialize() for b in self.bases],
20092012
'_promote': None if self._promote is None else self._promote.serialize(),
20102013
'tuple_type': None if self.tuple_type is None else self.tuple_type.serialize(),
2011-
'is_named_tuple': self.is_named_tuple,
2012-
'is_newtype': self.is_newtype,
2014+
'flags': get_flags(self, TypeInfo.FLAGS),
20132015
}
20142016
return data
20152017

@@ -2022,18 +2024,14 @@ def deserialize(cls, data: JsonDict) -> 'TypeInfo':
20222024
ti._fullname = data['fullname']
20232025
ti.alt_fullname = data['alt_fullname']
20242026
# TODO: Is there a reason to reconstruct ti.subtypes?
2025-
ti.is_abstract = data['is_abstract']
20262027
ti.abstract_attributes = data['abstract_attributes']
2027-
ti.is_enum = data['is_enum']
2028-
ti.fallback_to_any = data['fallback_to_any']
20292028
ti.type_vars = data['type_vars']
20302029
ti.bases = [mypy.types.Instance.deserialize(b) for b in data['bases']]
20312030
ti._promote = (None if data['_promote'] is None
20322031
else mypy.types.Type.deserialize(data['_promote']))
20332032
ti.tuple_type = (None if data['tuple_type'] is None
20342033
else mypy.types.TupleType.deserialize(data['tuple_type']))
2035-
ti.is_named_tuple = data['is_named_tuple']
2036-
ti.is_newtype = data['is_newtype']
2034+
set_flags(ti, data['flags'])
20372035
return ti
20382036

20392037

@@ -2215,14 +2213,11 @@ def function_type(func: FuncBase, fallback: 'mypy.types.Instance') -> 'mypy.type
22152213
name = func.name()
22162214
if name:
22172215
name = '"{}"'.format(name)
2218-
names = [] # type: List[str]
2219-
for arg in fdef.arguments:
2220-
names.append(arg.variable.name())
22212216

22222217
return mypy.types.CallableType(
2223-
[mypy.types.AnyType()] * len(fdef.arguments),
2224-
[arg.kind for arg in fdef.arguments],
2225-
names,
2218+
[mypy.types.AnyType()] * len(fdef.arg_names),
2219+
fdef.arg_kinds,
2220+
fdef.arg_names,
22262221
mypy.types.AnyType(),
22272222
fallback,
22282223
name,
@@ -2292,3 +2287,12 @@ def merge(seqs: List[List[TypeInfo]]) -> List[TypeInfo]:
22922287
for s in seqs:
22932288
if s[0] is head:
22942289
del s[0]
2290+
2291+
2292+
def get_flags(node: Node, names: List[str]) -> List[str]:
2293+
return [name for name in names if getattr(node, name)]
2294+
2295+
2296+
def set_flags(node: Node, flags: List[str]) -> None:
2297+
for name in flags:
2298+
setattr(node, name, True)

0 commit comments

Comments
 (0)