Skip to content

Commit 89626e8

Browse files
authored
Implement cache size for CachingFileSystem (#1377)
1 parent bb60983 commit 89626e8

File tree

2 files changed

+63
-0
lines changed

2 files changed

+63
-0
lines changed

fsspec/implementations/cached.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,11 @@ def __init__(
128128
self.expiry = expiry_time
129129
self.compression = compression
130130

131+
# Size of cache in bytes. If None then the size is unknown and will be
132+
# recalculated the next time cache_size() is called. On writes to the
133+
# cache this is reset to None.
134+
self._cache_size = None
135+
131136
if same_names is not None and cache_mapper is not None:
132137
raise ValueError(
133138
"Cannot specify both same_names and cache_mapper in "
@@ -165,6 +170,17 @@ def _remove_tempdir(tempdir):
165170
def _mkcache(self):
166171
os.makedirs(self.storage[-1], exist_ok=True)
167172

173+
def cache_size(self):
174+
"""Return size of cache in bytes.
175+
176+
If more than one cache directory is in use, only the size of the last
177+
one (the writable cache directory) is returned.
178+
"""
179+
if self._cache_size is None:
180+
cache_dir = self.storage[-1]
181+
self._cache_size = filesystem("file").du(cache_dir, withdirs=True)
182+
return self._cache_size
183+
168184
def load_cache(self):
169185
"""Read set of stored blocks from file"""
170186
self._metadata.load()
@@ -176,6 +192,7 @@ def save_cache(self):
176192
self._mkcache()
177193
self._metadata.save()
178194
self.last_cache = time.time()
195+
self._cache_size = None
179196

180197
def _check_cache(self):
181198
"""Reload caches if time elapsed or any disappeared"""
@@ -202,6 +219,7 @@ def clear_cache(self):
202219
"""
203220
rmtree(self.storage[-1])
204221
self.load_cache()
222+
self._cache_size = None
205223

206224
def clear_expired_cache(self, expiry_time=None):
207225
"""Remove all expired files and metadata from the cache
@@ -231,6 +249,8 @@ def clear_expired_cache(self, expiry_time=None):
231249
rmtree(self.storage[-1])
232250
self.load_cache()
233251

252+
self._cache_size = None
253+
234254
def pop_from_cache(self, path):
235255
"""Remove cached version of given file
236256
@@ -242,6 +262,7 @@ def pop_from_cache(self, path):
242262
fn = self._metadata.pop_file(path)
243263
if fn is not None:
244264
os.remove(fn)
265+
self._cache_size = None
245266

246267
def _open(
247268
self,
@@ -389,6 +410,7 @@ def __getattribute__(self, item):
389410
"__hash__",
390411
"__eq__",
391412
"to_json",
413+
"cache_size",
392414
]:
393415
# all the methods defined in this class. Note `open` here, since
394416
# it calls `_open`, but is actually in superclass
@@ -535,6 +557,7 @@ def commit_many(self, open_files):
535557
os.remove(f.name)
536558
except FileNotFoundError:
537559
pass
560+
self._cache_size = None
538561

539562
def _make_local_details(self, path):
540563
hash = self._mapper(path)
@@ -704,6 +727,7 @@ def _open(self, path, mode="rb", **kwargs):
704727
kwargs["mode"] = mode
705728

706729
self._mkcache()
730+
self._cache_size = None
707731
if self.compression:
708732
with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
709733
if isinstance(f, AbstractBufferedFile):

fsspec/implementations/tests/test_cached.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
)
1717
from fsspec.implementations.cached import CachingFileSystem, LocalTempFile
1818
from fsspec.implementations.local import make_path_posix
19+
from fsspec.tests.conftest import win
1920

2021
from .test_ftp import FTPFileSystem
2122

@@ -1211,3 +1212,41 @@ def test_cache_dir_auto_deleted(temp_cache, tmpdir):
12111212
assert not local.exists(cache_dir)
12121213
else:
12131214
assert local.exists(cache_dir)
1215+
1216+
1217+
@pytest.mark.parametrize("protocol", ["filecache", "blockcache", "simplecache"])
1218+
def test_cache_size(tmpdir, protocol):
1219+
if win and protocol == "blockcache":
1220+
pytest.skip("Windows file locking affects blockcache size tests")
1221+
1222+
source = os.path.join(tmpdir, "source")
1223+
afile = os.path.join(source, "afile")
1224+
os.mkdir(source)
1225+
open(afile, "w").write("test")
1226+
1227+
fs = fsspec.filesystem(protocol, target_protocol="file")
1228+
empty_cache_size = fs.cache_size()
1229+
1230+
# Create cache
1231+
with fs.open(afile, "rb") as f:
1232+
assert f.read(5) == b"test"
1233+
single_file_cache_size = fs.cache_size()
1234+
assert single_file_cache_size > empty_cache_size
1235+
1236+
# Remove cached file but leave cache metadata file
1237+
fs.pop_from_cache(afile)
1238+
if win and protocol == "filecache":
1239+
empty_cache_size < fs.cache_size()
1240+
elif protocol != "simplecache":
1241+
assert empty_cache_size < fs.cache_size() < single_file_cache_size
1242+
else:
1243+
# simplecache never stores metadata
1244+
assert fs.cache_size() == single_file_cache_size
1245+
1246+
# Completely remove cache
1247+
fs.clear_cache()
1248+
if protocol != "simplecache":
1249+
assert fs.cache_size() == empty_cache_size
1250+
else:
1251+
# Whole cache directory has been deleted
1252+
assert fs.cache_size() == 0

0 commit comments

Comments
 (0)