Skip to content

Commit 56b7668

Browse files
committed
fix get path following oob recursive move
1 parent bea8dd4 commit 56b7668

File tree

2 files changed

+87
-28
lines changed

2 files changed

+87
-28
lines changed

jupyter_server/services/contents/fileidmanager.py

Lines changed: 63 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -92,13 +92,25 @@ def _index_dir_recursively(self, dir_path, stat_info):
9292
scan_iter.close()
9393

9494
def _sync_all(self):
95-
"""Syncs Files table with the filesystem and ensures that the correct
96-
path is associated with each file ID. Does so by traversing the file
97-
tree recursively, looking for dirty directories. A dirty directory is a
98-
directory that possibly contains previously indexed files which were
99-
moved into it. An unindexed directory is always dirty, whereas an
100-
indexed directory is dirty only if the `mtime` in the record and the
101-
current `mtime` differ."""
95+
"""
96+
Syncs Files table with the filesystem and ensures that the correct path
97+
is associated with each file ID. Does so by iterating through all
98+
indexed directories and syncing the contents of all dirty directories.
99+
100+
Notes
101+
-----
102+
A dirty directory is a directory that is either:
103+
- unindexed
104+
- indexed but moved
105+
- indexed but with different `mtime`
106+
107+
Dirty directories contain possibly indexed but moved files as children.
108+
Hence we need to call _sync_file() on their contents via _sync_dir().
109+
Indexed directories that are dirty solely because of mtime difference
110+
are included in the below SELECT query. Unindexed or indexed-but-moved
111+
dirty directories are not included in the query, and hence must be
112+
handled in _sync_dir().
113+
"""
102114
cursor = self.con.execute("SELECT id, path, mtime FROM Files WHERE is_dir = 1")
103115
for dir in cursor:
104116
id, path, old_mtime = dir
@@ -115,47 +127,71 @@ def _sync_all(self):
115127
self._update(id, stat_info)
116128

117129
def _sync_dir(self, dir_path):
118-
"""Syncs the contents of a directory. Indexes previously unindexed
119-
directories and recursively syncs their contents."""
130+
"""
131+
Syncs the contents of a directory. If a child directory is dirty because
132+
it is either unindexed or indexed-but-moved, then the contents of that
133+
child directory are synced. See _sync_all() for more on dirty
134+
directories.
135+
"""
120136
with os.scandir(dir_path) as scan_iter:
121137
for entry in scan_iter:
122138
stat_info = self._parse_raw_stat(entry.stat())
123-
id = self._sync_file(entry.path, stat_info)
139+
id, is_dirty_dir = self._sync_file(entry.path, stat_info)
124140

125-
# treat unindexed directories as dirty. create a new record and
126-
# recursive sync dir contents.
127-
if stat_info.is_dir and id is None:
141+
if id is None:
128142
self._create(entry.path, stat_info)
143+
144+
# sync dirty dir contents if it is either unindexed or
145+
# indexed-but-moved
146+
if is_dirty_dir:
129147
self._sync_dir(entry.path)
148+
130149
scan_iter.close()
131150

132151
def _sync_file(self, path, stat_info):
133-
"""Syncs the file at path with the Files table by detecting whether the
152+
"""
153+
Syncs the file at `path` with the Files table by detecting whether the
134154
file was previously indexed but moved. Updates the record with the new
135-
path and returns the file ID if the file was previously indexed. Returns
136-
None otherwise."""
155+
path. This ensures that the file at path is associated with the correct
156+
file ID. This method does nothing if the file at `path` was not
157+
previously indexed.
158+
159+
Returns
160+
-------
161+
Returns a two-tuple containing the elements
162+
163+
id : int, optional
164+
ID of the file if it was previously indexed. None otherwise.
165+
166+
dir_dirty: bool
167+
Whether the file is a dirty directory and should be traversed by
168+
_sync_dir(). Not necessarily true even if the `mtime` differs, since
169+
directories which are dirty only because of `mtime` difference are
170+
included in the query run by _sync_all(). See _sync_all() for more
171+
on dirty directories.
172+
"""
137173
src = self.con.execute(
138-
"SELECT id, crtime, mtime FROM Files WHERE ino = ?", (stat_info.ino,)
174+
"SELECT id, path, crtime, mtime FROM Files WHERE ino = ?", (stat_info.ino,)
139175
).fetchone()
140176

141177
# if no record with matching ino, then return None
142178
if not src:
143-
return None
179+
return None, stat_info.is_dir
144180

145-
id, src_crtime, src_mtime = src
181+
id, old_path, src_crtime, src_mtime = src
146182
src_timestamp = src_crtime if src_crtime is not None else src_mtime
147183
dst_timestamp = stat_info.crtime if stat_info.crtime is not None else stat_info.mtime
148184

149185
# if record has identical ino and crtime/mtime to an existing record,
150186
# update it with new destination path and stat info, returning its id
151187
if src_timestamp == dst_timestamp:
152188
self._update(id, stat_info, path)
153-
return id
189+
return id, stat_info.is_dir and old_path != path
154190

155191
# otherwise delete the existing record with identical `ino`, since inos
156192
# must be unique. then return None
157193
self.con.execute("DELETE FROM Files WHERE id = ?", (id,))
158-
return None
194+
return None, stat_info.is_dir
159195

160196
def _normalize_path(self, path):
161197
"""Normalizes a given file path."""
@@ -187,9 +223,8 @@ def _parse_raw_stat(self, raw_stat, stat_info=None):
187223
return stat_info
188224

189225
def _stat(self, path):
190-
"""Returns stat info on a path in a StatStruct object. Writes to
191-
`stat_info` StatStruct arg if passed. Returns None if file does not
192-
exist at path."""
226+
"""Returns stat info on a path in a StatStruct object.Returns None if
227+
file does not exist at path."""
193228
stat_info = StatStruct()
194229

195230
try:
@@ -200,7 +235,7 @@ def _stat(self, path):
200235
return self._parse_raw_stat(raw_stat, stat_info)
201236

202237
def _create(self, path, stat_info):
203-
"""Creates a record given its stat info and path. Returns the new file
238+
"""Creates a record given its path and stat info. Returns the new file
204239
ID."""
205240
cursor = self.con.execute(
206241
"INSERT INTO Files (path, ino, crtime, mtime, is_dir) VALUES (?, ?, ?, ?, ?)",
@@ -210,7 +245,7 @@ def _create(self, path, stat_info):
210245
return cursor.lastrowid
211246

212247
def _update(self, id, stat_info, path=None):
213-
"""Updates a record given its file ID."""
248+
"""Updates a record given its file ID, stat info, and possibly path."""
214249
if path is not None:
215250
self.con.execute(
216251
"UPDATE Files SET path = ?, ino = ?, crtime = ?, mtime = ? WHERE id = ?",
@@ -236,7 +271,7 @@ def index(self, path, stat_info=None, commit=True):
236271
return None
237272

238273
# sync file at path and return file ID if it exists
239-
id = self._sync_file(path, stat_info)
274+
id, _ = self._sync_file(path, stat_info)
240275
if id is not None:
241276
return id
242277

@@ -256,7 +291,7 @@ def get_id(self, path):
256291
return None
257292

258293
# then sync file at path and retrieve id, if any
259-
id = self._sync_file(path, stat_info)
294+
id, _ = self._sync_file(path, stat_info)
260295
self.con.commit()
261296
return id
262297

tests/services/contents/test_fileidmanager.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,30 @@ def test_get_path_oob_move(fid_manager, old_path, new_path):
141141
assert fid_manager.get_path(id) == new_path
142142

143143

144+
def test_get_path_oob_move_recursive(
145+
fid_manager, old_path, old_path_child, new_path, new_path_child
146+
):
147+
id = fid_manager.index(old_path)
148+
child_id = fid_manager.index(old_path_child)
149+
150+
os.rename(old_path, new_path)
151+
152+
assert fid_manager.get_path(id) == new_path
153+
assert fid_manager.get_path(child_id) == new_path_child
154+
155+
156+
def test_get_path_oob_move_into_unindexed(
157+
fid_manager, old_path, old_path_child, new_path, new_path_child
158+
):
159+
fid_manager.index(old_path)
160+
id = fid_manager.index(old_path_child)
161+
162+
os.mkdir(new_path)
163+
os.rename(old_path_child, new_path_child)
164+
165+
assert fid_manager.get_path(id) == new_path_child
166+
167+
144168
def test_move_unindexed(fid_manager, old_path, new_path):
145169
os.rename(old_path, new_path)
146170
id = fid_manager.move(old_path, new_path)

0 commit comments

Comments
 (0)