Skip to content

Commit 4593cf0

Browse files
Adds LazyReferenceMapper to api.rst (#1378)
1 parent ac290c8 commit 4593cf0

File tree

2 files changed

+17
-3
lines changed

2 files changed

+17
-3
lines changed

docs/source/api.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ Built-in Implementations
124124
fsspec.implementations.local.LocalFileSystem
125125
fsspec.implementations.memory.MemoryFileSystem
126126
fsspec.implementations.reference.ReferenceFileSystem
127+
fsspec.implementations.reference.LazyReferenceMapper
127128
fsspec.implementations.sftp.SFTPFileSystem
128129
fsspec.implementations.smb.SMBFileSystem
129130
fsspec.implementations.tar.TarFileSystem
@@ -181,6 +182,9 @@ Built-in Implementations
181182
.. autoclass:: fsspec.implementations.reference.ReferenceFileSystem
182183
:members: __init__
183184

185+
.. autoclass:: fsspec.implementations.reference.LazyReferenceMapper
186+
:members: __init__
187+
184188
.. autoclass:: fsspec.implementations.sftp.SFTPFileSystem
185189
:members: __init__
186190

fsspec/implementations/reference.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,12 @@ def ravel_multi_index(idx, sizes):
8282

8383

8484
class LazyReferenceMapper(collections.abc.MutableMapping):
85-
"""Interface to read parquet store as if it were a standard kerchunk
86-
references dict."""
85+
"""This interface can be used to read/write references from Parquet stores.
86+
It is not intended for other types of references.
87+
It can be used with Kerchunk's MultiZarrToZarr method to combine
88+
references into a parquet store.
89+
Examples of this use-case can be found here:
90+
https://fsspec.github.io/kerchunk/advanced.html?highlight=parquet#parquet-storage"""
8791

8892
# import is class level to prevent numpy dep requirement for fsspec
8993
@property
@@ -108,9 +112,15 @@ def __init__(
108112
Root of parquet store
109113
fs : fsspec.AbstractFileSystem
110114
fsspec filesystem object, default is local filesystem.
111-
cache_size : int
115+
cache_size : int, default=128
112116
Maximum size of LRU cache, where cache_size*record_size denotes
113117
the total number of references that can be loaded in memory at once.
118+
categorical_threshold : int
119+
Encode urls as pandas.Categorical to reduce memory footprint if the ratio
120+
of the number of unique urls to total number of refs for each variable
121+
is greater than or equal to this number. (default 10)
122+
123+
114124
"""
115125
self.root = root
116126
self.chunk_sizes = {}

0 commit comments

Comments
 (0)