Skip to content

Commit 26544c6

Browse files
jtlaytonidryomov
authored andcommitted
ceph: when seeing write errors on an inode, switch to sync writes
Currently, we don't have a real feedback mechanism in place for when we start seeing buffered writeback errors. If writeback is failing, there is nothing that prevents an application from continuing to dirty pages that aren't being cleaned. In the event that we're seeing write errors of any sort occur on an inode, have the callback set a flag to force further writes to be synchronous. When the next write succeeds, clear the flag to allow buffered writeback to continue. Since this is just a hint to the write submission mechanism, we only take the i_ceph_lock when a lockless check shows that the flag needs to be changed. Signed-off-by: Jeff Layton <[email protected]> Reviewed-by: "Yan, Zheng” <[email protected]> Signed-off-by: Ilya Dryomov <[email protected]>
1 parent 6fc1fe5 commit 26544c6

File tree

3 files changed

+49
-14
lines changed

3 files changed

+49
-14
lines changed

fs/ceph/addr.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -670,8 +670,12 @@ static void writepages_finish(struct ceph_osd_request *req)
670670
bool remove_page;
671671

672672
dout("writepages_finish %p rc %d\n", inode, rc);
673-
if (rc < 0)
673+
if (rc < 0) {
674674
mapping_set_error(mapping, rc);
675+
ceph_set_error_write(ci);
676+
} else {
677+
ceph_clear_error_write(ci);
678+
}
675679

676680
/*
677681
* We lost the cache cap, need to truncate the page before

fs/ceph/file.c

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,19 +1089,22 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
10891089

10901090
out:
10911091
ceph_osdc_put_request(req);
1092-
if (ret == 0) {
1093-
pos += len;
1094-
written += len;
1095-
1096-
if (pos > i_size_read(inode)) {
1097-
check_caps = ceph_inode_set_size(inode, pos);
1098-
if (check_caps)
1099-
ceph_check_caps(ceph_inode(inode),
1100-
CHECK_CAPS_AUTHONLY,
1101-
NULL);
1102-
}
1103-
} else
1092+
if (ret != 0) {
1093+
ceph_set_error_write(ci);
11041094
break;
1095+
}
1096+
1097+
ceph_clear_error_write(ci);
1098+
pos += len;
1099+
written += len;
1100+
if (pos > i_size_read(inode)) {
1101+
check_caps = ceph_inode_set_size(inode, pos);
1102+
if (check_caps)
1103+
ceph_check_caps(ceph_inode(inode),
1104+
CHECK_CAPS_AUTHONLY,
1105+
NULL);
1106+
}
1107+
11051108
}
11061109

11071110
if (ret != -EOLDSNAPC && written > 0) {
@@ -1307,6 +1310,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
13071310
}
13081311

13091312
retry_snap:
1313+
/* FIXME: not complete since it doesn't account for being at quota */
13101314
if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) {
13111315
err = -ENOSPC;
13121316
goto out;
@@ -1328,7 +1332,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
13281332
inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
13291333

13301334
if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
1331-
(iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
1335+
(iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC) ||
1336+
(ci->i_ceph_flags & CEPH_I_ERROR_WRITE)) {
13321337
struct ceph_snap_context *snapc;
13331338
struct iov_iter data;
13341339
inode_unlock(inode);

fs/ceph/super.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,32 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
474474
#define CEPH_I_CAP_DROPPED (1 << 8) /* caps were forcibly dropped */
475475
#define CEPH_I_KICK_FLUSH (1 << 9) /* kick flushing caps */
476476
#define CEPH_I_FLUSH_SNAPS (1 << 10) /* need flush snapss */
477+
#define CEPH_I_ERROR_WRITE (1 << 11) /* have seen write errors */
478+
479+
/*
480+
* We set the ERROR_WRITE bit when we start seeing write errors on an inode
481+
* and then clear it when they start succeeding. Note that we do a lockless
482+
* check first, and only take the lock if it looks like it needs to be changed.
483+
* The write submission code just takes this as a hint, so we're not too
484+
* worried if a few slip through in either direction.
485+
*/
486+
static inline void ceph_set_error_write(struct ceph_inode_info *ci)
487+
{
488+
if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE)) {
489+
spin_lock(&ci->i_ceph_lock);
490+
ci->i_ceph_flags |= CEPH_I_ERROR_WRITE;
491+
spin_unlock(&ci->i_ceph_lock);
492+
}
493+
}
494+
495+
static inline void ceph_clear_error_write(struct ceph_inode_info *ci)
496+
{
497+
if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE) {
498+
spin_lock(&ci->i_ceph_lock);
499+
ci->i_ceph_flags &= ~CEPH_I_ERROR_WRITE;
500+
spin_unlock(&ci->i_ceph_lock);
501+
}
502+
}
477503

478504
static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
479505
long long release_count,

0 commit comments

Comments
 (0)