Skip to content

Commit fe245c8

Browse files
committed
afs: Add comments on abort handling
Add some comments on AFS abort code handling in the rotation algorithm and adjust the errors produced to match. Reported-by: Jeffrey E Altman <[email protected]> Signed-off-by: David Howells <[email protected]> Reviewed-by: Jeffrey Altman <[email protected]> cc: Marc Dionne <[email protected]> cc: [email protected]
1 parent bad1a11 commit fe245c8

File tree

1 file changed

+90
-11
lines changed

1 file changed

+90
-11
lines changed

fs/afs/rotate.c

Lines changed: 90 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <linux/sched/signal.h>
1414
#include "internal.h"
1515
#include "afs_fs.h"
16+
#include "protocol_uae.h"
1617

1718
/*
1819
* Begin iteration through a server list, starting with the vnode's last used
@@ -143,13 +144,23 @@ bool afs_select_fileserver(struct afs_operation *op)
143144
case -ECONNABORTED:
144145
/* The far side rejected the operation on some grounds. This
145146
* might involve the server being busy or the volume having been moved.
147+
*
148+
* Note that various V* errors should not be sent to a cache manager
149+
* by a fileserver as they should be translated to more modern UAE*
150+
* errors instead. IBM AFS and OpenAFS fileservers, however, do leak
151+
* these abort codes.
146152
*/
147153
switch (op->ac.abort_code) {
148154
case VNOVOL:
149155
/* This fileserver doesn't know about the volume.
150156
* - May indicate that the VL is wrong - retry once and compare
151157
* the results.
152158
* - May indicate that the fileserver couldn't attach to the vol.
159+
* - The volume might have been temporarily removed so that it can
160+
* be replaced by a volume restore. "vos" might have ended one
161+
* transaction and has yet to create the next.
162+
* - The volume might not be blessed or might not be in-service
163+
* (administrative action).
153164
*/
154165
if (op->flags & AFS_OPERATION_VNOVOL) {
155166
op->error = -EREMOTEIO;
@@ -183,16 +194,56 @@ bool afs_select_fileserver(struct afs_operation *op)
183194
_leave(" = t [vnovol]");
184195
return true;
185196

186-
case VSALVAGE: /* TODO: Should this return an error or iterate? */
187197
case VVOLEXISTS:
188-
case VNOSERVICE:
189198
case VONLINE:
190-
case VDISKFULL:
191-
case VOVERQUOTA:
192-
op->error = afs_abort_to_error(op->ac.abort_code);
199+
/* These should not be returned from the fileserver. */
200+
pr_warn("Fileserver returned unexpected abort %d\n",
201+
op->ac.abort_code);
202+
op->error = -EREMOTEIO;
203+
goto next_server;
204+
205+
case VNOSERVICE:
206+
/* Prior to AFS 3.2 VNOSERVICE was returned from the fileserver
207+
* if the volume was neither in-service nor administratively
208+
* blessed. All usage was replaced by VNOVOL because AFS 3.1 and
209+
* earlier cache managers did not handle VNOSERVICE and assumed
210+
* it was the client OSes errno 105.
211+
*
212+
* Starting with OpenAFS 1.4.8 VNOSERVICE was repurposed as the
213+
* fileserver idle dead time error which was sent in place of
214+
* RX_CALL_TIMEOUT (-3). The error was intended to be sent if the
215+
* fileserver took too long to send a reply to the client.
216+
* RX_CALL_TIMEOUT would have caused the cache manager to mark the
217+
* server down whereas VNOSERVICE since AFS 3.2 would cause cache
218+
* manager to temporarily (up to 15 minutes) mark the volume
219+
* instance as unusable.
220+
*
221+
* The idle dead logic resulted in cache inconsistency since a
222+
* state changing call that the cache manager assumed was dead
223+
* could still be processed to completion by the fileserver. This
224+
* logic was removed in OpenAFS 1.8.0 and VNOSERVICE is no longer
225+
* returned. However, many 1.4.8 through 1.6.24 fileservers are
226+
* still in existence.
227+
*
228+
* AuriStorFS fileservers have never returned VNOSERVICE.
229+
*
230+
* VNOSERVICE should be treated as an alias for RX_CALL_TIMEOUT.
231+
*/
232+
case RX_CALL_TIMEOUT:
233+
op->error = -ETIMEDOUT;
193234
goto next_server;
194235

236+
case VSALVAGING: /* This error should not be leaked to cache managers
237+
* but is from OpenAFS demand attach fileservers.
238+
* It should be treated as an alias for VOFFLINE.
239+
*/
240+
case VSALVAGE: /* VSALVAGE should be treated as a synonym of VOFFLINE */
195241
case VOFFLINE:
242+
/* The volume is in use by the volserver or another volume utility
243+
* for an operation that might alter the contents. The volume is
244+
* expected to come back but it might take a long time (could be
245+
* days).
246+
*/
196247
if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &op->volume->flags)) {
197248
afs_busy(op->volume, op->ac.abort_code);
198249
clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
@@ -207,11 +258,20 @@ bool afs_select_fileserver(struct afs_operation *op)
207258
}
208259
goto busy;
209260

210-
case VSALVAGING:
211-
case VRESTARTING:
261+
case VRESTARTING: /* The fileserver is either shutting down or starting up. */
212262
case VBUSY:
213-
/* Retry after going round all the servers unless we
214-
* have a file lock we need to maintain.
263+
/* The volume is in use by the volserver or another volume
264+
* utility for an operation that is not expected to alter the
265+
* contents of the volume. VBUSY does not need to be returned
266+
* for a ROVOL or BACKVOL bound to an ITBusy volserver
267+
* transaction. The fileserver is permitted to continue serving
268+
* content from ROVOLs and BACKVOLs during an ITBusy transaction
269+
* because the content will not change. However, many fileserver
270+
* releases do return VBUSY for ROVOL and BACKVOL instances under
271+
* many circumstances.
272+
*
273+
* Retry after going round all the servers unless we have a file
274+
* lock we need to maintain.
215275
*/
216276
if (op->flags & AFS_OPERATION_NO_VSLEEP) {
217277
op->error = -EBUSY;
@@ -226,7 +286,7 @@ bool afs_select_fileserver(struct afs_operation *op)
226286
if (!afs_sleep_and_retry(op))
227287
goto failed;
228288

229-
/* Retry with same server & address */
289+
/* Retry with same server & address */
230290
_leave(" = t [vbusy]");
231291
return true;
232292
}
@@ -270,10 +330,29 @@ bool afs_select_fileserver(struct afs_operation *op)
270330

271331
goto restart_from_beginning;
272332

333+
case VDISKFULL:
334+
case UAENOSPC:
335+
/* The partition is full. Only applies to RWVOLs.
336+
* Translate locally and return ENOSPC.
337+
* No replicas to failover to.
338+
*/
339+
op->error = -ENOSPC;
340+
goto failed_but_online;
341+
342+
case VOVERQUOTA:
343+
case UAEDQUOT:
344+
/* Volume is full. Only applies to RWVOLs.
345+
* Translate locally and return EDQUOT.
346+
* No replicas to failover to.
347+
*/
348+
op->error = -EDQUOT;
349+
goto failed_but_online;
350+
273351
default:
352+
op->error = afs_abort_to_error(op->ac.abort_code);
353+
failed_but_online:
274354
clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
275355
clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
276-
op->error = afs_abort_to_error(op->ac.abort_code);
277356
goto failed;
278357
}
279358

0 commit comments

Comments
 (0)