Skip to content

Fix bug in cache updates and improve cache logging #3708

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 13, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 56 additions & 24 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,16 +860,17 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache
"""
# TODO: May need to take more build options into account
meta_json, data_json = get_cache_names(id, path, manager)
manager.trace('Looking for {} {}'.format(id, data_json))
manager.trace('Looking for {} at {}'.format(id, meta_json))
if not os.path.exists(meta_json):
manager.trace('Could not load cache for {}: could not find {}'.format(id, meta_json))
manager.log('Could not load cache for {}: could not find {}'.format(id, meta_json))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the change from trace to log? And did you intend to change the trace above as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So a run with one -v will always show the decision around a particular cache file (every return None from this function, plus specific actions in validate_meta(). The above trace method is not a decision, promoting that would double the number of lines logged per cache file.

return None
with open(meta_json, 'r') as f:
meta_str = f.read()
manager.trace('Meta {} {}'.format(id, meta_str.rstrip()))
meta = json.loads(meta_str) # TODO: Errors
if not isinstance(meta, dict):
manager.trace('Could not load cache for {}: meta cache is not a dict'.format(id))
manager.log('Could not load cache for {}: meta cache is not a dict: {}'
.format(id, repr(meta)))
return None
m = CacheMeta(
meta.get('id'),
Expand All @@ -891,27 +892,36 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache
if (m.id != id or
m.mtime is None or m.size is None or
m.dependencies is None or m.data_mtime is None):
manager.trace('Metadata abandoned for {}: attributes are missing'.format(id))
manager.log('Metadata abandoned for {}: attributes are missing'.format(id))
return None

# Ignore cache if generated by an older mypy version.
if ((m.version_id != manager.version_id and not manager.options.skip_version_check)
or m.options is None
or len(m.dependencies) != len(m.dep_prios)):
manager.trace('Metadata abandoned for {}: new attributes are missing'.format(id))
manager.log('Metadata abandoned for {}: new attributes are missing'.format(id))
return None

# Ignore cache if (relevant) options aren't the same.
# Note that it's fine to mutilate cached_options since it's only used here.
cached_options = m.options
current_options = manager.options.clone_for_module(id).select_options_affecting_cache()
if manager.options.quick_and_dirty:
# In quick_and_dirty mode allow non-quick_and_dirty cache files.
cached_options['quick_and_dirty'] = True
if not cached_options.get('platform') and manager.options.skip_version_check:
# Older versions didn't write platform.
cached_options['platform'] = manager.options.platform
if manager.options.skip_version_check:
# When we're lax about version we're also lax about platform.
cached_options['platform'] = current_options['platform']
if 'debug_cache' in cached_options:
# Older versions included debug_cache, but it's silly to compare it.
del cached_options['debug_cache']
if cached_options != current_options:
manager.trace('Metadata abandoned for {}: options differ'.format(id))
manager.log('Metadata abandoned for {}: options differ'.format(id))
if manager.options.verbosity >= 2:
for key in sorted(set(cached_options) | set(current_options)):
if cached_options.get(key) != current_options.get(key):
manager.trace(' {}: {} != {}'
.format(key, cached_options.get(key), current_options.get(key)))
return None

return m
Expand Down Expand Up @@ -948,41 +958,63 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: str,
# we use cache data file mtime to propagate information about changes in the dependencies.

if meta is None:
manager.log('Metadata not found for {}'.format(id))
return None

# Check data_json; assume if its mtime matches it's good.
# TODO: stat() errors
data_mtime = getmtime(meta.data_json)
if data_mtime != meta.data_mtime:
manager.log('Metadata abandoned for {}: data cache is modified'.format(id))
return None

# TODO: Share stat() outcome with find_module()
path = os.path.abspath(path)
st = manager.get_stat(path) # TODO: Errors
if st.st_size != meta.size:
size = st.st_size
if size != meta.size:
manager.log('Metadata abandoned for {}: file {} has different size'.format(id, path))
return None

if int(st.st_mtime) != meta.mtime or path != meta.path:
mtime = int(st.st_mtime)
if mtime != meta.mtime or path != meta.path:
with open(path, 'rb') as f:
source_hash = hashlib.md5(f.read()).hexdigest()
if source_hash != meta.hash:
manager.log('Metadata abandoned for {}: file {} has different hash'.format(id, path))
return None
else:
manager.log('Metadata ok for {}: file {} (match on path, size, hash)'.format(id, path))
# Optimization: update mtime and path (otherwise, this mismatch will reappear).
meta = meta._replace(mtime=int(st.st_mtime), path=path)
meta = meta._replace(mtime=mtime, path=path)
# Construct a dict we can pass to json.dumps() (compare to write_cache()).
meta_dict = {
'id': id,
'path': path,
'mtime': mtime,
'size': size,
'hash': source_hash,
'data_mtime': data_mtime,
'dependencies': meta.dependencies,
'suppressed': meta.suppressed,
'child_modules': meta.child_modules,
'options': (manager.options.clone_for_module(id)
.select_options_affecting_cache()),
'dep_prios': meta.dep_prios,
'interface_hash': meta.interface_hash,
'version_id': manager.version_id,
}
if manager.options.debug_cache:
meta_str = json.dumps(meta, indent=2, sort_keys=True)
meta_str = json.dumps(meta_dict, indent=2, sort_keys=True)
else:
meta_str = json.dumps(meta)
meta_str = json.dumps(meta_dict)
meta_json, _ = get_cache_names(id, path, manager)
manager.log('Updating mtime for {}: file {}, meta {}, mtime {}'
.format(id, path, meta_json, meta.mtime))
atomic_write(meta_json, meta_str) # Ignore errors, since this is just an optimization.
atomic_write(meta_json, meta_str, '\n') # Ignore errors, it's just an optimization.
return meta

# It's a match on (id, path, mtime/hash, size).
# Check data_json; assume if its mtime matches it's good.
# TODO: stat() errors
if getmtime(meta.data_json) != meta.data_mtime:
manager.log('Metadata abandoned for {}: data cache is modified'.format(id))
return None
manager.log('Found {} {} (metadata is fresh)'.format(id, meta.data_json))
# It's a match on (id, path, size, hash, mtime).
manager.log('Metadata fresh for {}: file {}'.format(id, path))
return meta


Expand Down Expand Up @@ -1096,7 +1128,7 @@ def write_cache(id: str, path: str, tree: MypyFile,
meta_str = json.dumps(meta, indent=2, sort_keys=True)
else:
meta_str = json.dumps(meta)
if not atomic_write(meta_json, meta_str):
if not atomic_write(meta_json, meta_str, '\n'):
# Most likely the error is the replace() call
# (see https://github.com/python/mypy/issues/3215).
# The next run will simply find the cache entry out of date.
Expand Down
3 changes: 2 additions & 1 deletion mypy/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ class Options:
"strict_optional",
}

OPTIONS_AFFECTING_CACHE = (PER_MODULE_OPTIONS | {"quick_and_dirty", "platform"})
OPTIONS_AFFECTING_CACHE = ((PER_MODULE_OPTIONS | {"quick_and_dirty", "platform"})
- {"debug_cache"})

def __init__(self) -> None:
# -- build options --
Expand Down