Skip to content

Commit 695de46

Browse files
committed
minimize transactions by restricting usage to public methods in FIM
1 parent 4e60209 commit 695de46

File tree

1 file changed

+55
-50
lines changed

1 file changed

+55
-50
lines changed

jupyter_server/services/contents/fileidmanager.py

Lines changed: 55 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,21 @@ class StatStruct:
1717

1818

1919
class FileIdManager(LoggingConfigurable):
20+
"""
21+
Manager that supports tracks files across their lifetime by associating
22+
each with a unique file ID, which is maintained across filesystem operations.
23+
24+
Notes
25+
-----
26+
27+
All private helper methods prefixed with an underscore (except `__init__()`)
28+
do NOT commit their SQL statements in a transaction via `self.con.commit()`.
29+
This responsibility is delegated to the public method calling them to
30+
increase performance. Committing multiple SQL transactions in serial is much
31+
slower than committing a single SQL transaction wrapping all SQL statements
32+
performed during a method's procedure body.
33+
"""
34+
2035
root_dir = Unicode(
2136
help=("The root being served by Jupyter server. Must be an absolute path."), config=True
2237
)
@@ -48,7 +63,7 @@ def __init__(self, *args, **kwargs):
4863
"is_dir TINYINT NOT NULL"
4964
")"
5065
)
51-
self._index_all_dirs()
66+
self._index_dir_recursively(self.root_dir)
5267
self.con.execute("CREATE INDEX IF NOT EXISTS ix_Files_path ON Files (path)")
5368
self.con.execute("CREATE INDEX IF NOT EXISTS ix_Files_ino ON Files (ino)")
5469
self.con.execute("CREATE INDEX IF NOT EXISTS ix_Files_is_dir ON Files (is_dir)")
@@ -62,11 +77,6 @@ def _validate_abspath_traits(self, proposal):
6277
raise TraitError("FileIdManager : %s must be an absolute path" % proposal["trait"].name)
6378
return self._normalize_path(proposal["value"])
6479

65-
def _index_all_dirs(self):
66-
"""Recursively indexes all directories under the server root."""
67-
self._index_dir_recursively(self.root_dir)
68-
self.con.commit()
69-
7080
def _index_dir_recursively(self, dir_path):
7181
"""Recursively indexes all directories under a given path. Used in
7282
__init__() to recursively index all directories under the server
@@ -79,14 +89,13 @@ def _index_dir_recursively(self, dir_path):
7989
scan_iter.close()
8090

8191
def _sync_all(self):
82-
"""Syncs Files table (the index being referred to by this method) with
83-
the filesystem and ensures that the correct path is associated with each
84-
file ID. Does so by traversing the file tree recursively, looking for
85-
dirty directories. A dirty directory is a directory that possibly
86-
contains previously indexed files which were moved into it. An
87-
unindexed directory is always dirty, whereas an indexed directory is
88-
dirty only if the `mtime` in the record and the current `mtime`
89-
differ."""
92+
"""Syncs Files table with the filesystem and ensures that the correct
93+
path is associated with each file ID. Does so by traversing the file
94+
tree recursively, looking for dirty directories. A dirty directory is a
95+
directory that possibly contains previously indexed files which were
96+
moved into it. An unindexed directory is always dirty, whereas an
97+
indexed directory is dirty only if the `mtime` in the record and the
98+
current `mtime` differ."""
9099
cursor = self.con.execute("SELECT id, path, mtime FROM Files WHERE is_dir = 1")
91100
for dir in cursor:
92101
id, path, old_mtime = dir
@@ -101,7 +110,6 @@ def _sync_all(self):
101110
if dir_dirty:
102111
self._sync_dir(path)
103112
self._update(id, stat_info)
104-
self.con.commit()
105113

106114
def _sync_dir(self, dir_path):
107115
"""Syncs the contents of a directory. Indexes previously unindexed
@@ -114,15 +122,15 @@ def _sync_dir(self, dir_path):
114122
# treat unindexed directories as dirty. create a new record and
115123
# recursive sync dir contents.
116124
if stat_info.is_dir and id is None:
117-
# commits transactions in _sync_all()
118125
self._create(entry.path, stat_info)
119126
self._sync_dir(entry.path)
120127
scan_iter.close()
121128

122129
def _sync_file(self, path, stat_info):
123-
"""Detects whether the file at path was a previously indexed file moved
124-
out-of-band. Returns the file ID and updates the record with the new
125-
path if an out-of-band move is indicated. Returns None otherwise."""
130+
"""Syncs the file at path with the Files table by detecting whether the
131+
file was previously indexed but moved. Updates the record with the new
132+
path and returns the file ID if the file was previously indexed. Returns
133+
None otherwise."""
126134
src = self.con.execute(
127135
"SELECT id, crtime, mtime FROM Files WHERE ino = ?", (stat_info.ino,)
128136
).fetchone()
@@ -144,7 +152,6 @@ def _sync_file(self, path, stat_info):
144152
# otherwise delete the existing record with identical `ino`, since inos
145153
# must be unique. then return None
146154
self.con.execute("DELETE FROM Files WHERE id = ?", (id,))
147-
self.con.commit()
148155
return None
149156

150157
def _normalize_path(self, path):
@@ -198,8 +205,6 @@ def _create(self, path, stat_info):
198205
(path, stat_info.ino, stat_info.crtime, stat_info.mtime, stat_info.is_dir),
199206
)
200207

201-
self.con.commit()
202-
203208
return cursor.lastrowid
204209

205210
def _update(self, id, stat_info, path=None):
@@ -219,38 +224,38 @@ def _update(self, id, stat_info, path=None):
219224
"UPDATE Files SET ino = ?, crtime = ?, mtime = ? WHERE id = ?",
220225
(stat_info.ino, stat_info.crtime, stat_info.mtime, id),
221226
)
222-
self.con.commit()
223227

224228
def index(self, path):
225229
"""Returns the file ID for the file at `path`, creating a new file ID if
226-
one does not exist. Returns None only if file does not exist at path.
227-
Note that this essentially just wraps `get_id()` and creates a new
228-
record if it returns None and the file exists."""
229-
stat_info = StatStruct()
230-
existing_id = self.get_id(path, stat_info)
231-
if existing_id is not None:
232-
return existing_id
233-
if stat_info.empty:
230+
one does not exist. Returns None only if file does not exist at path."""
231+
path = self._normalize_path(path)
232+
stat_info = self._stat(path)
233+
if not stat_info:
234234
return None
235235

236-
# if path does not exist in the DB and an out-of-band move is not indicated, create a new record
237-
path = self._normalize_path(path)
238-
return self._create(path, stat_info)
239-
240-
def get_id(self, path, stat_info=None):
241-
"""Retrieves the file ID associated with a file path. Tracks out-of-band
242-
moves by searching the table for a record with an identical `ino` and
243-
`crtime` (falling back to `mtime` if `crtime` is not supported on the
244-
current platform). Updates the file's stat info on invocation and caches
245-
this in the `stat_info` arg if passed. Returns None if the file has not
246-
yet been indexed or does not exist at the given path."""
236+
# sync file at path and return file ID if it exists
237+
id = self._sync_file(path, stat_info)
238+
if id is not None:
239+
return id
240+
241+
# otherwise, create a new record and return the file ID
242+
id = self._create(path, stat_info)
243+
self.con.commit()
244+
return id
245+
246+
def get_id(self, path):
247+
"""Retrieves the file ID associated with a file path. Returns None if
248+
the file has not yet been indexed or does not exist at the given
249+
path."""
247250
path = self._normalize_path(path)
248-
stat_info = self._stat(path, stat_info)
251+
stat_info = self._stat(path)
249252
if not stat_info:
250253
return None
251254

252-
# then check if file at path was indexed but moved out-of-band
253-
return self._sync_file(path, stat_info)
255+
# then sync file at path and retrieve id, if any
256+
id = self._sync_file(path, stat_info)
257+
self.con.commit()
258+
return id
254259

255260
def get_path(self, id):
256261
"""Retrieves the file path associated with a file ID. Returns None if
@@ -290,20 +295,21 @@ def move(self, old_path, new_path, recursive=False):
290295
"UPDATE Files SET path = ?, mtime = ? WHERE id = ?",
291296
(new_recpath, rec_stat_info.mtime, id),
292297
)
293-
self.con.commit()
294298

295299
# attempt to fetch ID associated with old path
296300
# we avoid using get_id() here since that will always return None as file no longer exists at old path
297301
row = self.con.execute("SELECT id FROM Files WHERE path = ?", (old_path,)).fetchone()
298302
if row is None:
299303
# if no existing record, create a new one
300-
# TODO: pass stat info to avoid useless syscall
301-
return self.index(new_path)
304+
id = self._create(new_path, stat_info)
305+
self.con.commit()
306+
return id
302307
else:
303308
# update existing record with new path and stat info
304309
# TODO: make sure is_dir for existing record matches that of file at new_path
305310
id = row[0]
306311
self._update(id, stat_info, new_path)
312+
self.con.commit()
307313
return id
308314

309315
def copy(self, from_path, to_path, recursive=False):
@@ -339,8 +345,8 @@ def copy(self, from_path, to_path, recursive=False):
339345
stat_info.is_dir,
340346
),
341347
)
342-
self.con.commit()
343348

349+
# transaction committed in index()
344350
self.index(from_path)
345351
return self.index(to_path)
346352

@@ -353,7 +359,6 @@ def delete(self, path, recursive=False):
353359
if recursive:
354360
path_glob = os.path.join(path, "*")
355361
self.con.execute("DELETE FROM Files WHERE path GLOB ?", (path_glob,))
356-
self.con.commit()
357362

358363
self.con.execute("DELETE FROM Files WHERE path = ?", (path,))
359364
self.con.commit()

0 commit comments

Comments
 (0)