Skip to content

Commit 4352a44

Browse files
authored
Caching for fine-grained incremental mode (#4483)
1 parent 26b51e5 commit 4352a44

16 files changed

+267
-28
lines changed

mypy/build.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1131,6 +1131,17 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],
11311131
if not stat.S_ISREG(st.st_mode):
11321132
manager.log('Metadata abandoned for {}: file {} does not exist'.format(id, path))
11331133
return None
1134+
1135+
# When we are using a fine-grained cache, we want our initial
1136+
# build() to load all of the cache information and then do a
1137+
# fine-grained incremental update to catch anything that has
1138+
# changed since the cache was generated. We *don't* want to do a
1139+
# coarse-grained incremental rebuild, so we accept the cache
1140+
# metadata even if it doesn't match the source file.
1141+
if manager.options.use_fine_grained_cache:
1142+
manager.log('Using potentially stale metadata for {}'.format(id))
1143+
return meta
1144+
11341145
size = st.st_size
11351146
if size != meta.size:
11361147
manager.log('Metadata abandoned for {}: file {} has different size'.format(id, path))
@@ -2383,6 +2394,14 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
23832394
manager.log("Processing SCC of size %d (%s) as %s" % (size, scc_str, fresh_msg))
23842395
process_stale_scc(graph, scc, manager)
23852396

2397+
# If we are running in fine-grained incremental mode with caching,
2398+
# we always process fresh SCCs so that we have all of the symbol
2399+
# tables and fine-grained dependencies available.
2400+
if manager.options.use_fine_grained_cache:
2401+
for prev_scc in fresh_scc_queue:
2402+
process_fresh_scc(graph, prev_scc, manager)
2403+
fresh_scc_queue = []
2404+
23862405
sccs_left = len(fresh_scc_queue)
23872406
nodes_left = sum(len(scc) for scc in fresh_scc_queue)
23882407
manager.add_stats(sccs_left=sccs_left, nodes_left=nodes_left)
@@ -2569,7 +2588,7 @@ def process_stale_scc(graph: Graph, scc: List[str], manager: BuildManager) -> No
25692588
graph[id].transitive_error = True
25702589
for id in stale:
25712590
graph[id].finish_passes()
2572-
if manager.options.cache_fine_grained:
2591+
if manager.options.cache_fine_grained or manager.options.fine_grained_incremental:
25732592
graph[id].compute_fine_grained_deps()
25742593
graph[id].generate_unused_ignore_notes()
25752594
manager.flush_errors(manager.errors.file_messages(graph[id].xpath), False)

mypy/dmypy_server.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from mypy.dmypy_util import STATUS_FILE, receive
2525
from mypy.gclogger import GcLogger
2626
from mypy.fscache import FileSystemCache
27-
from mypy.fswatcher import FileSystemWatcher
27+
from mypy.fswatcher import FileSystemWatcher, FileData
2828

2929

3030
def daemonize(func: Callable[[], None], log_file: Optional[str] = None) -> int:
@@ -99,13 +99,18 @@ def __init__(self, flags: List[str]) -> None:
9999
sys.exit("dmypy: start/restart should not disable incremental mode")
100100
if options.quick_and_dirty:
101101
sys.exit("dmypy: start/restart should not specify quick_and_dirty mode")
102+
if options.use_fine_grained_cache and not options.fine_grained_incremental:
103+
sys.exit("dmypy: fine-grained cache can only be used in experimental mode")
102104
self.options = options
103105
if os.path.isfile(STATUS_FILE):
104106
os.unlink(STATUS_FILE)
105107
if self.fine_grained:
106108
options.incremental = True
107109
options.show_traceback = True
108-
options.cache_dir = os.devnull
110+
if options.use_fine_grained_cache:
111+
options.cache_fine_grained = True # set this so that cache options match
112+
else:
113+
options.cache_dir = os.devnull
109114

110115
def serve(self) -> None:
111116
"""Serve requests, synchronously (no thread or fork)."""
@@ -263,11 +268,29 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict
263268
manager = result.manager
264269
graph = result.graph
265270
self.fine_grained_manager = mypy.server.update.FineGrainedBuildManager(manager, graph)
266-
status = 1 if messages else 0
267-
self.previous_messages = messages[:]
268271
self.fine_grained_initialized = True
269272
self.previous_sources = sources
270273
self.fscache.flush()
274+
275+
# If we are using the fine-grained cache, build hasn't actually done
276+
# the typechecking on the updated files yet.
277+
# Run a fine-grained update starting from the cached data
278+
if self.options.use_fine_grained_cache:
279+
# Pull times and hashes out of the saved_cache and stick them into
280+
# the fswatcher, so we pick up the changes.
281+
for meta, mypyfile, type_map in manager.saved_cache.values():
282+
if meta.mtime is None: continue
283+
self.fswatcher.set_file_data(
284+
meta.path,
285+
FileData(st_mtime=float(meta.mtime), st_size=meta.size, md5=meta.hash))
286+
287+
# Run an update
288+
changed = self.find_changed(sources)
289+
if changed:
290+
messages = self.fine_grained_manager.update(changed)
291+
292+
status = 1 if messages else 0
293+
self.previous_messages = messages[:]
271294
return {'out': ''.join(s + '\n' for s in messages), 'err': '', 'status': status}
272295

273296
def fine_grained_increment(self, sources: List[mypy.build.BuildSource]) -> Dict[str, Any]:

mypy/fswatcher.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ def __init__(self, fs: FileSystemCache) -> None:
3636
def paths(self) -> AbstractSet[str]:
3737
return self._paths
3838

39+
def set_file_data(self, path: str, data: FileData) -> None:
40+
self._file_data[path] = data
41+
3942
def add_watched_paths(self, paths: Iterable[str]) -> None:
4043
for path in paths:
4144
if path not in self._paths:

mypy/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,8 @@ def add_invertible_flag(flag: str,
395395
if server_options:
396396
parser.add_argument('--experimental', action='store_true', dest='fine_grained_incremental',
397397
help="enable fine-grained incremental mode")
398+
parser.add_argument('--use-fine-grained-cache', action='store_true',
399+
help="use the cache in fine-grained incremental mode")
398400

399401
report_group = parser.add_argument_group(
400402
title='report generation',

mypy/nodes.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,8 @@ class MypyFile(SymbolNode):
203203
ignored_lines = None # type: Set[int]
204204
# Is this file represented by a stub file (.pyi)?
205205
is_stub = False
206+
# Is this loaded from the cache and thus missing the actual body of the file?
207+
is_cache_skeleton = False
206208

207209
def __init__(self,
208210
defs: List[Statement],
@@ -249,6 +251,7 @@ def deserialize(cls, data: JsonDict) -> 'MypyFile':
249251
tree.names = SymbolTable.deserialize(data['names'])
250252
tree.is_stub = data['is_stub']
251253
tree.path = data['path']
254+
tree.is_cache_skeleton = True
252255
return tree
253256

254257

mypy/options.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ def __init__(self) -> None:
144144
self.skip_version_check = False
145145
self.fine_grained_incremental = False
146146
self.cache_fine_grained = False
147+
self.use_fine_grained_cache = False
147148

148149
# Paths of user plugins
149150
self.plugins = [] # type: List[str]

mypy/server/update.py

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,10 @@ def __init__(self,
170170
self.blocking_error = None # type: Optional[Tuple[str, str]]
171171
# Module that we haven't processed yet but that are known to be stale.
172172
self.stale = [] # type: List[Tuple[str, str]]
173+
# Disable the cache so that load_graph doesn't try going back to disk
174+
# for the cache. This is kind of a hack and it might be better to have
175+
# this directly reflected in load_graph's interface.
176+
self.options.cache_dir = os.devnull
173177
mark_all_meta_as_memory_only(graph, manager)
174178
manager.saved_cache = preserve_full_cache(graph, manager)
175179
self.type_maps = extract_type_maps(graph)
@@ -281,9 +285,10 @@ def update_single(self, module: str, path: str) -> Tuple[List[str],
281285
print('triggered:', sorted(filtered))
282286
self.triggered.extend(triggered | self.previous_targets_with_errors)
283287
collect_dependencies({module: tree}, self.deps, graph)
284-
propagate_changes_using_dependencies(manager, graph, self.deps, triggered,
285-
{module},
286-
self.previous_targets_with_errors)
288+
remaining += propagate_changes_using_dependencies(
289+
manager, graph, self.deps, triggered,
290+
{module},
291+
self.previous_targets_with_errors)
287292

288293
# Preserve state needed for the next update.
289294
self.previous_targets_with_errors = manager.errors.targets()
@@ -318,6 +323,7 @@ def mark_all_meta_as_memory_only(graph: Dict[str, State],
318323
def get_all_dependencies(manager: BuildManager, graph: Dict[str, State],
319324
options: Options) -> Dict[str, Set[str]]:
320325
"""Return the fine-grained dependency map for an entire build."""
326+
# Deps for each module were computed during build() or loaded from the cache.
321327
deps = {} # type: Dict[str, Set[str]]
322328
collect_dependencies(manager.modules, deps, graph)
323329
return deps
@@ -374,7 +380,7 @@ def update_single_isolated(module: str,
374380
sources = get_sources(previous_modules, [(module, path)])
375381
invalidate_stale_cache_entries(manager.saved_cache, [(module, path)])
376382

377-
manager.missing_modules = set()
383+
manager.missing_modules.clear()
378384
try:
379385
graph = load_graph(sources, manager)
380386
except CompileError as err:
@@ -441,6 +447,7 @@ def update_single_isolated(module: str,
441447
# Perform type checking.
442448
state.type_check_first_pass()
443449
state.type_check_second_pass()
450+
state.compute_fine_grained_deps()
444451
state.finish_passes()
445452
# TODO: state.write_cache()?
446453
# TODO: state.mark_as_rechecked()?
@@ -492,7 +499,8 @@ def delete_module(module_id: str,
492499
# TODO: Remove deps for the module (this only affects memory use, not correctness)
493500
assert module_id not in graph
494501
new_graph = graph.copy()
495-
del manager.modules[module_id]
502+
if module_id in manager.modules:
503+
del manager.modules[module_id]
496504
if module_id in manager.saved_cache:
497505
del manager.saved_cache[module_id]
498506
components = module_id.split('.')
@@ -654,7 +662,6 @@ def collect_dependencies(new_modules: Mapping[str, Optional[MypyFile]],
654662
for id, node in new_modules.items():
655663
if node is None:
656664
continue
657-
graph[id].compute_fine_grained_deps()
658665
for trigger, targets in graph[id].fine_grained_deps.items():
659666
deps.setdefault(trigger, set()).update(targets)
660667

@@ -711,9 +718,15 @@ def propagate_changes_using_dependencies(
711718
deps: Dict[str, Set[str]],
712719
triggered: Set[str],
713720
up_to_date_modules: Set[str],
714-
targets_with_errors: Set[str]) -> None:
721+
targets_with_errors: Set[str]) -> List[Tuple[str, str]]:
722+
"""Transitively rechecks targets based on triggers and the dependency map.
723+
724+
Returns a list (module id, path) tuples representing modules that contain
725+
a target that needs to be reprocessed but that has not been parsed yet."""
726+
715727
# TODO: Multiple type checking passes
716728
num_iter = 0
729+
remaining_modules = []
717730

718731
# Propagate changes until nothing visible has changed during the last
719732
# iteration.
@@ -737,7 +750,13 @@ def propagate_changes_using_dependencies(
737750
# TODO: Preserve order (set is not optimal)
738751
for id, nodes in sorted(todo.items(), key=lambda x: x[0]):
739752
assert id not in up_to_date_modules
740-
triggered |= reprocess_nodes(manager, graph, id, nodes, deps)
753+
if manager.modules[id].is_cache_skeleton:
754+
# We have only loaded the cache for this file, not the actual file,
755+
# so we can't access the nodes to reprocess.
756+
# Add it to the queue of files that need to be processed fully.
757+
remaining_modules.append((id, manager.modules[id].path))
758+
else:
759+
triggered |= reprocess_nodes(manager, graph, id, nodes, deps)
741760
# Changes elsewhere may require us to reprocess modules that were
742761
# previously considered up to date. For example, there may be a
743762
# dependency loop that loops back to an originally processed module.
@@ -746,6 +765,8 @@ def propagate_changes_using_dependencies(
746765
if DEBUG:
747766
print('triggered:', list(triggered))
748767

768+
return remaining_modules
769+
749770

750771
def find_targets_recursive(
751772
triggers: Set[str],
@@ -993,4 +1014,6 @@ def lookup_target(modules: Dict[str, MypyFile], target: str) -> List[DeferredNod
9931014

9941015

9951016
def extract_type_maps(graph: Graph) -> Dict[str, Dict[Expression, Type]]:
996-
return {id: state.type_map() for id, state in graph.items()}
1017+
# This is used to export information used only by the testmerge harness.
1018+
return {id: state.type_map() for id, state in graph.items()
1019+
if state.tree}

mypy/test/testdmypy.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ def run_case_once(self, testcase: DataDrivenTestCase, incremental_step: int) ->
119119
server_options = [] # type: List[str]
120120
if 'fine-grained' in testcase.file:
121121
server_options.append('--experimental')
122+
options.fine_grained_incremental = True
122123
self.server = dmypy_server.Server(server_options) # TODO: Fix ugly API
123124
self.server.options = options
124125

mypy/test/testfinegrained.py

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from mypy.test.testtypegen import ignore_node
3131
from mypy.types import TypeStrVisitor, Type
3232
from mypy.util import short_type
33+
import pytest # type: ignore # no pytest in typeshed
3334

3435

3536
class FineGrainedSuite(DataSuite):
@@ -41,17 +42,44 @@ class FineGrainedSuite(DataSuite):
4142
]
4243
base_path = test_temp_dir
4344
optional_out = True
45+
# Whether to use the fine-grained cache in the testing. This is overridden
46+
# by a trivial subclass to produce a suite that uses the cache.
47+
use_cache = False
48+
49+
# Decide whether to skip the test. This could have been structured
50+
# as a filter() classmethod also, but we want the tests reported
51+
# as skipped, not just elided.
52+
def should_skip(self, testcase: DataDrivenTestCase) -> bool:
53+
if self.use_cache:
54+
if testcase.name.endswith("-skip-cache"):
55+
return True
56+
# TODO: In caching mode we currently don't well support
57+
# starting from cached states with errors in them.
58+
if testcase.output and testcase.output[0] != '==':
59+
return True
60+
else:
61+
if testcase.name.endswith("-skip-nocache"):
62+
return True
63+
64+
return False
4465

4566
def run_case(self, testcase: DataDrivenTestCase) -> None:
67+
if self.should_skip(testcase):
68+
pytest.skip()
69+
return
70+
4671
main_src = '\n'.join(testcase.input)
4772
sources_override = self.parse_sources(main_src)
48-
messages, manager, graph = self.build(main_src, testcase, sources_override)
49-
73+
messages, manager, graph = self.build(main_src, testcase, sources_override,
74+
build_cache=self.use_cache,
75+
enable_cache=self.use_cache)
5076
a = []
5177
if messages:
5278
a.extend(normalize_messages(messages))
5379

54-
fine_grained_manager = FineGrainedBuildManager(manager, graph)
80+
fine_grained_manager = None
81+
if not self.use_cache:
82+
fine_grained_manager = FineGrainedBuildManager(manager, graph)
5583

5684
steps = testcase.find_steps()
5785
all_triggered = []
@@ -70,6 +98,14 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
7098
modules = [(module, path)
7199
for module, path in sources_override
72100
if any(m == module for m, _ in modules)]
101+
102+
# If this is the second iteration and we are using a
103+
# cache, now we need to set it up
104+
if fine_grained_manager is None:
105+
messages, manager, graph = self.build(main_src, testcase, sources_override,
106+
build_cache=False, enable_cache=True)
107+
fine_grained_manager = FineGrainedBuildManager(manager, graph)
108+
73109
new_messages = fine_grained_manager.update(modules)
74110
all_triggered.append(fine_grained_manager.triggered)
75111
new_messages = normalize_messages(new_messages)
@@ -82,8 +118,8 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
82118

83119
assert_string_arrays_equal(
84120
testcase.output, a,
85-
'Invalid output ({}, line {})'.format(testcase.file,
86-
testcase.line))
121+
'Invalid output ({}, line {})'.format(
122+
testcase.file, testcase.line))
87123

88124
if testcase.triggered:
89125
assert_string_arrays_equal(
@@ -95,14 +131,18 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
95131
def build(self,
96132
source: str,
97133
testcase: DataDrivenTestCase,
98-
sources_override: Optional[List[Tuple[str, str]]]) -> Tuple[List[str],
99-
BuildManager,
100-
Graph]:
134+
sources_override: Optional[List[Tuple[str, str]]],
135+
build_cache: bool,
136+
enable_cache: bool) -> Tuple[List[str], BuildManager, Graph]:
101137
# This handles things like '# flags: --foo'.
102138
options = parse_options(source, testcase, incremental_step=1)
103139
options.incremental = True
104140
options.use_builtins_fixtures = True
105141
options.show_traceback = True
142+
options.fine_grained_incremental = not build_cache
143+
options.use_fine_grained_cache = enable_cache and not build_cache
144+
options.cache_fine_grained = enable_cache
145+
106146
main_path = os.path.join(test_temp_dir, 'main')
107147
with open(main_path, 'w') as f:
108148
f.write(source)

mypy/test/testfinegrainedcache.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
"""Tests for fine-grained incremental checking using the cache.
2+
3+
All of the real code for this lives in testfinegrained.py.
4+
"""
5+
6+
# We can't "import FineGrainedSuite from ..." because that will cause pytest
7+
# to collect the non-caching tests when running this file.
8+
import mypy.test.testfinegrained
9+
10+
11+
class FineGrainedCacheSuite(mypy.test.testfinegrained.FineGrainedSuite):
12+
use_cache = True

mypy/test/testmerge.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
9999
def build(self, source: str) -> Tuple[List[str], Optional[BuildManager], Dict[str, State]]:
100100
options = Options()
101101
options.incremental = True
102+
options.fine_grained_incremental = True
102103
options.use_builtins_fixtures = True
103104
options.show_traceback = True
104105
main_path = os.path.join(test_temp_dir, 'main')

0 commit comments

Comments
 (0)