@@ -17,6 +17,21 @@ class StatStruct:
17
17
18
18
19
19
class FileIdManager (LoggingConfigurable ):
20
+ """
21
+ Manager that supports tracks files across their lifetime by associating
22
+ each with a unique file ID, which is maintained across filesystem operations.
23
+
24
+ Notes
25
+ -----
26
+
27
+ All private helper methods prefixed with an underscore (except `__init__()`)
28
+ do NOT commit their SQL statements in a transaction via `self.con.commit()`.
29
+ This responsibility is delegated to the public method calling them to
30
+ increase performance. Committing multiple SQL transactions in serial is much
31
+ slower than committing a single SQL transaction wrapping all SQL statements
32
+ performed during a method's procedure body.
33
+ """
34
+
20
35
root_dir = Unicode (
21
36
help = ("The root being served by Jupyter server. Must be an absolute path." ), config = True
22
37
)
@@ -48,7 +63,7 @@ def __init__(self, *args, **kwargs):
48
63
"is_dir TINYINT NOT NULL"
49
64
")"
50
65
)
51
- self ._index_all_dirs ( )
66
+ self ._index_dir_recursively ( self . root_dir )
52
67
self .con .execute ("CREATE INDEX IF NOT EXISTS ix_Files_path ON Files (path)" )
53
68
self .con .execute ("CREATE INDEX IF NOT EXISTS ix_Files_ino ON Files (ino)" )
54
69
self .con .execute ("CREATE INDEX IF NOT EXISTS ix_Files_is_dir ON Files (is_dir)" )
@@ -62,11 +77,6 @@ def _validate_abspath_traits(self, proposal):
62
77
raise TraitError ("FileIdManager : %s must be an absolute path" % proposal ["trait" ].name )
63
78
return self ._normalize_path (proposal ["value" ])
64
79
65
- def _index_all_dirs (self ):
66
- """Recursively indexes all directories under the server root."""
67
- self ._index_dir_recursively (self .root_dir )
68
- self .con .commit ()
69
-
70
80
def _index_dir_recursively (self , dir_path ):
71
81
"""Recursively indexes all directories under a given path. Used in
72
82
__init__() to recursively index all directories under the server
@@ -79,14 +89,13 @@ def _index_dir_recursively(self, dir_path):
79
89
scan_iter .close ()
80
90
81
91
def _sync_all (self ):
82
- """Syncs Files table (the index being referred to by this method) with
83
- the filesystem and ensures that the correct path is associated with each
84
- file ID. Does so by traversing the file tree recursively, looking for
85
- dirty directories. A dirty directory is a directory that possibly
86
- contains previously indexed files which were moved into it. An
87
- unindexed directory is always dirty, whereas an indexed directory is
88
- dirty only if the `mtime` in the record and the current `mtime`
89
- differ."""
92
+ """Syncs Files table with the filesystem and ensures that the correct
93
+ path is associated with each file ID. Does so by traversing the file
94
+ tree recursively, looking for dirty directories. A dirty directory is a
95
+ directory that possibly contains previously indexed files which were
96
+ moved into it. An unindexed directory is always dirty, whereas an
97
+ indexed directory is dirty only if the `mtime` in the record and the
98
+ current `mtime` differ."""
90
99
cursor = self .con .execute ("SELECT id, path, mtime FROM Files WHERE is_dir = 1" )
91
100
for dir in cursor :
92
101
id , path , old_mtime = dir
@@ -101,7 +110,6 @@ def _sync_all(self):
101
110
if dir_dirty :
102
111
self ._sync_dir (path )
103
112
self ._update (id , stat_info )
104
- self .con .commit ()
105
113
106
114
def _sync_dir (self , dir_path ):
107
115
"""Syncs the contents of a directory. Indexes previously unindexed
@@ -114,15 +122,15 @@ def _sync_dir(self, dir_path):
114
122
# treat unindexed directories as dirty. create a new record and
115
123
# recursive sync dir contents.
116
124
if stat_info .is_dir and id is None :
117
- # commits transactions in _sync_all()
118
125
self ._create (entry .path , stat_info )
119
126
self ._sync_dir (entry .path )
120
127
scan_iter .close ()
121
128
122
129
def _sync_file (self , path , stat_info ):
123
- """Detects whether the file at path was a previously indexed file moved
124
- out-of-band. Returns the file ID and updates the record with the new
125
- path if an out-of-band move is indicated. Returns None otherwise."""
130
+ """Syncs the file at path with the Files table by detecting whether the
131
+ file was previously indexed but moved. Updates the record with the new
132
+ path and returns the file ID if the file was previously indexed. Returns
133
+ None otherwise."""
126
134
src = self .con .execute (
127
135
"SELECT id, crtime, mtime FROM Files WHERE ino = ?" , (stat_info .ino ,)
128
136
).fetchone ()
@@ -144,7 +152,6 @@ def _sync_file(self, path, stat_info):
144
152
# otherwise delete the existing record with identical `ino`, since inos
145
153
# must be unique. then return None
146
154
self .con .execute ("DELETE FROM Files WHERE id = ?" , (id ,))
147
- self .con .commit ()
148
155
return None
149
156
150
157
def _normalize_path (self , path ):
@@ -198,8 +205,6 @@ def _create(self, path, stat_info):
198
205
(path , stat_info .ino , stat_info .crtime , stat_info .mtime , stat_info .is_dir ),
199
206
)
200
207
201
- self .con .commit ()
202
-
203
208
return cursor .lastrowid
204
209
205
210
def _update (self , id , stat_info , path = None ):
@@ -219,38 +224,38 @@ def _update(self, id, stat_info, path=None):
219
224
"UPDATE Files SET ino = ?, crtime = ?, mtime = ? WHERE id = ?" ,
220
225
(stat_info .ino , stat_info .crtime , stat_info .mtime , id ),
221
226
)
222
- self .con .commit ()
223
227
224
228
def index (self , path ):
225
229
"""Returns the file ID for the file at `path`, creating a new file ID if
226
- one does not exist. Returns None only if file does not exist at path.
227
- Note that this essentially just wraps `get_id()` and creates a new
228
- record if it returns None and the file exists."""
229
- stat_info = StatStruct ()
230
- existing_id = self .get_id (path , stat_info )
231
- if existing_id is not None :
232
- return existing_id
233
- if stat_info .empty :
230
+ one does not exist. Returns None only if file does not exist at path."""
231
+ path = self ._normalize_path (path )
232
+ stat_info = self ._stat (path )
233
+ if not stat_info :
234
234
return None
235
235
236
- # if path does not exist in the DB and an out-of-band move is not indicated, create a new record
237
- path = self ._normalize_path (path )
238
- return self ._create (path , stat_info )
239
-
240
- def get_id (self , path , stat_info = None ):
241
- """Retrieves the file ID associated with a file path. Tracks out-of-band
242
- moves by searching the table for a record with an identical `ino` and
243
- `crtime` (falling back to `mtime` if `crtime` is not supported on the
244
- current platform). Updates the file's stat info on invocation and caches
245
- this in the `stat_info` arg if passed. Returns None if the file has not
246
- yet been indexed or does not exist at the given path."""
236
+ # sync file at path and return file ID if it exists
237
+ id = self ._sync_file (path , stat_info )
238
+ if id is not None :
239
+ return id
240
+
241
+ # otherwise, create a new record and return the file ID
242
+ id = self ._create (path , stat_info )
243
+ self .con .commit ()
244
+ return id
245
+
246
+ def get_id (self , path ):
247
+ """Retrieves the file ID associated with a file path. Returns None if
248
+ the file has not yet been indexed or does not exist at the given
249
+ path."""
247
250
path = self ._normalize_path (path )
248
- stat_info = self ._stat (path , stat_info )
251
+ stat_info = self ._stat (path )
249
252
if not stat_info :
250
253
return None
251
254
252
- # then check if file at path was indexed but moved out-of-band
253
- return self ._sync_file (path , stat_info )
255
+ # then sync file at path and retrieve id, if any
256
+ id = self ._sync_file (path , stat_info )
257
+ self .con .commit ()
258
+ return id
254
259
255
260
def get_path (self , id ):
256
261
"""Retrieves the file path associated with a file ID. Returns None if
@@ -290,20 +295,21 @@ def move(self, old_path, new_path, recursive=False):
290
295
"UPDATE Files SET path = ?, mtime = ? WHERE id = ?" ,
291
296
(new_recpath , rec_stat_info .mtime , id ),
292
297
)
293
- self .con .commit ()
294
298
295
299
# attempt to fetch ID associated with old path
296
300
# we avoid using get_id() here since that will always return None as file no longer exists at old path
297
301
row = self .con .execute ("SELECT id FROM Files WHERE path = ?" , (old_path ,)).fetchone ()
298
302
if row is None :
299
303
# if no existing record, create a new one
300
- # TODO: pass stat info to avoid useless syscall
301
- return self .index (new_path )
304
+ id = self ._create (new_path , stat_info )
305
+ self .con .commit ()
306
+ return id
302
307
else :
303
308
# update existing record with new path and stat info
304
309
# TODO: make sure is_dir for existing record matches that of file at new_path
305
310
id = row [0 ]
306
311
self ._update (id , stat_info , new_path )
312
+ self .con .commit ()
307
313
return id
308
314
309
315
def copy (self , from_path , to_path , recursive = False ):
@@ -339,8 +345,8 @@ def copy(self, from_path, to_path, recursive=False):
339
345
stat_info .is_dir ,
340
346
),
341
347
)
342
- self .con .commit ()
343
348
349
+ # transaction committed in index()
344
350
self .index (from_path )
345
351
return self .index (to_path )
346
352
@@ -353,7 +359,6 @@ def delete(self, path, recursive=False):
353
359
if recursive :
354
360
path_glob = os .path .join (path , "*" )
355
361
self .con .execute ("DELETE FROM Files WHERE path GLOB ?" , (path_glob ,))
356
- self .con .commit ()
357
362
358
363
self .con .execute ("DELETE FROM Files WHERE path = ?" , (path ,))
359
364
self .con .commit ()
0 commit comments