File tree Expand file tree Collapse file tree 2 files changed +17
-3
lines changed Expand file tree Collapse file tree 2 files changed +17
-3
lines changed Original file line number Diff line number Diff line change @@ -124,6 +124,7 @@ Built-in Implementations
124
124
fsspec.implementations.local.LocalFileSystem
125
125
fsspec.implementations.memory.MemoryFileSystem
126
126
fsspec.implementations.reference.ReferenceFileSystem
127
+ fsspec.implementations.reference.LazyReferenceMapper
127
128
fsspec.implementations.sftp.SFTPFileSystem
128
129
fsspec.implementations.smb.SMBFileSystem
129
130
fsspec.implementations.tar.TarFileSystem
@@ -181,6 +182,9 @@ Built-in Implementations
181
182
.. autoclass :: fsspec.implementations.reference.ReferenceFileSystem
182
183
:members: __init__
183
184
185
+ .. autoclass :: fsspec.implementations.reference.LazyReferenceMapper
186
+ :members: __init__
187
+
184
188
.. autoclass :: fsspec.implementations.sftp.SFTPFileSystem
185
189
:members: __init__
186
190
Original file line number Diff line number Diff line change @@ -82,8 +82,12 @@ def ravel_multi_index(idx, sizes):
82
82
83
83
84
84
class LazyReferenceMapper (collections .abc .MutableMapping ):
85
- """Interface to read parquet store as if it were a standard kerchunk
86
- references dict."""
85
+ """This interface can be used to read/write references from Parquet stores.
86
+ It is not intended for other types of references.
87
+ It can be used with Kerchunk's MultiZarrToZarr method to combine
88
+ references into a parquet store.
89
+ Examples of this use-case can be found here:
90
+ https://fsspec.github.io/kerchunk/advanced.html?highlight=parquet#parquet-storage"""
87
91
88
92
# import is class level to prevent numpy dep requirement for fsspec
89
93
@property
@@ -108,9 +112,15 @@ def __init__(
108
112
Root of parquet store
109
113
fs : fsspec.AbstractFileSystem
110
114
fsspec filesystem object, default is local filesystem.
111
- cache_size : int
115
+ cache_size : int, default=128
112
116
Maximum size of LRU cache, where cache_size*record_size denotes
113
117
the total number of references that can be loaded in memory at once.
118
+ categorical_threshold : int
119
+ Encode urls as pandas.Categorical to reduce memory footprint if the ratio
120
+ of the number of unique urls to total number of refs for each variable
121
+ is greater than or equal to this number. (default 10)
122
+
123
+
114
124
"""
115
125
self .root = root
116
126
self .chunk_sizes = {}
You can’t perform that action at this time.
0 commit comments