@@ -92,13 +92,25 @@ def _index_dir_recursively(self, dir_path, stat_info):
92
92
scan_iter .close ()
93
93
94
94
def _sync_all (self ):
95
- """Syncs Files table with the filesystem and ensures that the correct
96
- path is associated with each file ID. Does so by traversing the file
97
- tree recursively, looking for dirty directories. A dirty directory is a
98
- directory that possibly contains previously indexed files which were
99
- moved into it. An unindexed directory is always dirty, whereas an
100
- indexed directory is dirty only if the `mtime` in the record and the
101
- current `mtime` differ."""
95
+ """
96
+ Syncs Files table with the filesystem and ensures that the correct path
97
+ is associated with each file ID. Does so by iterating through all
98
+ indexed directories and syncing the contents of all dirty directories.
99
+
100
+ Notes
101
+ -----
102
+ A dirty directory is a directory that is either:
103
+ - unindexed
104
+ - indexed but moved
105
+ - indexed but with different `mtime`
106
+
107
+ Dirty directories contain possibly indexed but moved files as children.
108
+ Hence we need to call _sync_file() on their contents via _sync_dir().
109
+ Indexed directories that are dirty solely because of mtime difference
110
+ are included in the below SELECT query. Unindexed or indexed-but-moved
111
+ dirty directories are not included in the query, and hence must be
112
+ handled in _sync_dir().
113
+ """
102
114
cursor = self .con .execute ("SELECT id, path, mtime FROM Files WHERE is_dir = 1" )
103
115
for dir in cursor :
104
116
id , path , old_mtime = dir
@@ -115,47 +127,71 @@ def _sync_all(self):
115
127
self ._update (id , stat_info )
116
128
117
129
def _sync_dir (self , dir_path ):
118
- """Syncs the contents of a directory. Indexes previously unindexed
119
- directories and recursively syncs their contents."""
130
+ """
131
+ Syncs the contents of a directory. If a child directory is dirty because
132
+ it is either unindexed or indexed-but-moved, then the contents of that
133
+ child directory are synced. See _sync_all() for more on dirty
134
+ directories.
135
+ """
120
136
with os .scandir (dir_path ) as scan_iter :
121
137
for entry in scan_iter :
122
138
stat_info = self ._parse_raw_stat (entry .stat ())
123
- id = self ._sync_file (entry .path , stat_info )
139
+ id , is_dirty_dir = self ._sync_file (entry .path , stat_info )
124
140
125
- # treat unindexed directories as dirty. create a new record and
126
- # recursive sync dir contents.
127
- if stat_info .is_dir and id is None :
141
+ if id is None :
128
142
self ._create (entry .path , stat_info )
143
+
144
+ # sync dirty dir contents if it is either unindexed or
145
+ # indexed-but-moved
146
+ if is_dirty_dir :
129
147
self ._sync_dir (entry .path )
148
+
130
149
scan_iter .close ()
131
150
132
151
def _sync_file (self , path , stat_info ):
133
- """Syncs the file at path with the Files table by detecting whether the
152
+ """
153
+ Syncs the file at `path` with the Files table by detecting whether the
134
154
file was previously indexed but moved. Updates the record with the new
135
- path and returns the file ID if the file was previously indexed. Returns
136
- None otherwise."""
155
+ path. This ensures that the file at path is associated with the correct
156
+ file ID. This method does nothing if the file at `path` was not
157
+ previously indexed.
158
+
159
+ Returns
160
+ -------
161
+ Returns a two-tuple containing the elements
162
+
163
+ id : int, optional
164
+ ID of the file if it was previously indexed. None otherwise.
165
+
166
+ dir_dirty: bool
167
+ Whether the file is a dirty directory and should be traversed by
168
+ _sync_dir(). Not necessarily true even if the `mtime` differs, since
169
+ directories which are dirty only because of `mtime` difference are
170
+ included in the query run by _sync_all(). See _sync_all() for more
171
+ on dirty directories.
172
+ """
137
173
src = self .con .execute (
138
- "SELECT id, crtime, mtime FROM Files WHERE ino = ?" , (stat_info .ino ,)
174
+ "SELECT id, path, crtime, mtime FROM Files WHERE ino = ?" , (stat_info .ino ,)
139
175
).fetchone ()
140
176
141
177
# if no record with matching ino, then return None
142
178
if not src :
143
- return None
179
+ return None , stat_info . is_dir
144
180
145
- id , src_crtime , src_mtime = src
181
+ id , old_path , src_crtime , src_mtime = src
146
182
src_timestamp = src_crtime if src_crtime is not None else src_mtime
147
183
dst_timestamp = stat_info .crtime if stat_info .crtime is not None else stat_info .mtime
148
184
149
185
# if record has identical ino and crtime/mtime to an existing record,
150
186
# update it with new destination path and stat info, returning its id
151
187
if src_timestamp == dst_timestamp :
152
188
self ._update (id , stat_info , path )
153
- return id
189
+ return id , stat_info . is_dir and old_path != path
154
190
155
191
# otherwise delete the existing record with identical `ino`, since inos
156
192
# must be unique. then return None
157
193
self .con .execute ("DELETE FROM Files WHERE id = ?" , (id ,))
158
- return None
194
+ return None , stat_info . is_dir
159
195
160
196
def _normalize_path (self , path ):
161
197
"""Normalizes a given file path."""
@@ -187,9 +223,8 @@ def _parse_raw_stat(self, raw_stat, stat_info=None):
187
223
return stat_info
188
224
189
225
def _stat (self , path ):
190
- """Returns stat info on a path in a StatStruct object. Writes to
191
- `stat_info` StatStruct arg if passed. Returns None if file does not
192
- exist at path."""
226
+ """Returns stat info on a path in a StatStruct object.Returns None if
227
+ file does not exist at path."""
193
228
stat_info = StatStruct ()
194
229
195
230
try :
@@ -200,7 +235,7 @@ def _stat(self, path):
200
235
return self ._parse_raw_stat (raw_stat , stat_info )
201
236
202
237
def _create (self , path , stat_info ):
203
- """Creates a record given its stat info and path . Returns the new file
238
+ """Creates a record given its path and stat info . Returns the new file
204
239
ID."""
205
240
cursor = self .con .execute (
206
241
"INSERT INTO Files (path, ino, crtime, mtime, is_dir) VALUES (?, ?, ?, ?, ?)" ,
@@ -210,7 +245,7 @@ def _create(self, path, stat_info):
210
245
return cursor .lastrowid
211
246
212
247
def _update (self , id , stat_info , path = None ):
213
- """Updates a record given its file ID."""
248
+ """Updates a record given its file ID, stat info, and possibly path ."""
214
249
if path is not None :
215
250
self .con .execute (
216
251
"UPDATE Files SET path = ?, ino = ?, crtime = ?, mtime = ? WHERE id = ?" ,
@@ -236,7 +271,7 @@ def index(self, path, stat_info=None, commit=True):
236
271
return None
237
272
238
273
# sync file at path and return file ID if it exists
239
- id = self ._sync_file (path , stat_info )
274
+ id , _ = self ._sync_file (path , stat_info )
240
275
if id is not None :
241
276
return id
242
277
@@ -256,7 +291,7 @@ def get_id(self, path):
256
291
return None
257
292
258
293
# then sync file at path and retrieve id, if any
259
- id = self ._sync_file (path , stat_info )
294
+ id , _ = self ._sync_file (path , stat_info )
260
295
self .con .commit ()
261
296
return id
262
297
0 commit comments