Skip to content

Commit c4c2416

Browse files
Gang Hetorvalds
authored andcommitted
ocfs2: nowait aio support
Return EAGAIN if any of the following checks fail for direct I/O: - Cannot get the related locks immediately - Blocks are not allocated at the write location, it will trigger block allocation and block IO operations. [[email protected]: v4] Link: http://lkml.kernel.org/r/[email protected] [[email protected]: v2] Link: http://lkml.kernel.org/r/[email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Gang He <[email protected]> Reviewed-by: Alex Chen <[email protected]> Cc: Mark Fasheh <[email protected]> Cc: Joel Becker <[email protected]> Cc: Junxiao Bi <[email protected]> Cc: Joseph Qi <[email protected]> Cc: Changwei Ge <[email protected]> Cc: Jun Piao <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent ac604d3 commit c4c2416

File tree

6 files changed

+104
-33
lines changed

6 files changed

+104
-33
lines changed

fs/ocfs2/dir.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1958,7 +1958,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx)
19581958

19591959
trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);
19601960

1961-
error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level);
1961+
error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level, 1);
19621962
if (lock_level && error >= 0) {
19631963
/* We release EX lock which used to update atime
19641964
* and get PR lock again to reduce contention

fs/ocfs2/dlmglue.c

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2546,13 +2546,18 @@ int ocfs2_inode_lock_with_page(struct inode *inode,
25462546

25472547
int ocfs2_inode_lock_atime(struct inode *inode,
25482548
struct vfsmount *vfsmnt,
2549-
int *level)
2549+
int *level, int wait)
25502550
{
25512551
int ret;
25522552

2553-
ret = ocfs2_inode_lock(inode, NULL, 0);
2553+
if (wait)
2554+
ret = ocfs2_inode_lock(inode, NULL, 0);
2555+
else
2556+
ret = ocfs2_try_inode_lock(inode, NULL, 0);
2557+
25542558
if (ret < 0) {
2555-
mlog_errno(ret);
2559+
if (ret != -EAGAIN)
2560+
mlog_errno(ret);
25562561
return ret;
25572562
}
25582563

@@ -2564,9 +2569,14 @@ int ocfs2_inode_lock_atime(struct inode *inode,
25642569
struct buffer_head *bh = NULL;
25652570

25662571
ocfs2_inode_unlock(inode, 0);
2567-
ret = ocfs2_inode_lock(inode, &bh, 1);
2572+
if (wait)
2573+
ret = ocfs2_inode_lock(inode, &bh, 1);
2574+
else
2575+
ret = ocfs2_try_inode_lock(inode, &bh, 1);
2576+
25682577
if (ret < 0) {
2569-
mlog_errno(ret);
2578+
if (ret != -EAGAIN)
2579+
mlog_errno(ret);
25702580
return ret;
25712581
}
25722582
*level = 1;

fs/ocfs2/dlmglue.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ int ocfs2_try_open_lock(struct inode *inode, int write);
146146
void ocfs2_open_unlock(struct inode *inode);
147147
int ocfs2_inode_lock_atime(struct inode *inode,
148148
struct vfsmount *vfsmnt,
149-
int *level);
149+
int *level, int wait);
150150
int ocfs2_inode_lock_full_nested(struct inode *inode,
151151
struct buffer_head **ret_bh,
152152
int ex,

fs/ocfs2/file.c

Lines changed: 80 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,8 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
140140
spin_unlock(&oi->ip_lock);
141141
}
142142

143+
file->f_mode |= FMODE_NOWAIT;
144+
143145
leave:
144146
return status;
145147
}
@@ -2132,26 +2134,53 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
21322134
}
21332135

21342136
static int ocfs2_prepare_inode_for_write(struct file *file,
2135-
loff_t pos,
2136-
size_t count)
2137+
loff_t pos, size_t count, int wait)
21372138
{
2138-
int ret = 0, meta_level = 0;
2139+
int ret = 0, meta_level = 0, overwrite_io = 0;
21392140
struct dentry *dentry = file->f_path.dentry;
21402141
struct inode *inode = d_inode(dentry);
2142+
struct buffer_head *di_bh = NULL;
21412143
loff_t end;
21422144

21432145
/*
21442146
* We start with a read level meta lock and only jump to an ex
21452147
* if we need to make modifications here.
21462148
*/
21472149
for(;;) {
2148-
ret = ocfs2_inode_lock(inode, NULL, meta_level);
2150+
if (wait)
2151+
ret = ocfs2_inode_lock(inode, NULL, meta_level);
2152+
else
2153+
ret = ocfs2_try_inode_lock(inode,
2154+
overwrite_io ? NULL : &di_bh, meta_level);
21492155
if (ret < 0) {
21502156
meta_level = -1;
2151-
mlog_errno(ret);
2157+
if (ret != -EAGAIN)
2158+
mlog_errno(ret);
21522159
goto out;
21532160
}
21542161

2162+
/*
2163+
* Check if IO will overwrite allocated blocks in case
2164+
* IOCB_NOWAIT flag is set.
2165+
*/
2166+
if (!wait && !overwrite_io) {
2167+
overwrite_io = 1;
2168+
if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) {
2169+
ret = -EAGAIN;
2170+
goto out_unlock;
2171+
}
2172+
2173+
ret = ocfs2_overwrite_io(inode, di_bh, pos, count);
2174+
brelse(di_bh);
2175+
di_bh = NULL;
2176+
up_read(&OCFS2_I(inode)->ip_alloc_sem);
2177+
if (ret < 0) {
2178+
if (ret != -EAGAIN)
2179+
mlog_errno(ret);
2180+
goto out_unlock;
2181+
}
2182+
}
2183+
21552184
/* Clear suid / sgid if necessary. We do this here
21562185
* instead of later in the write path because
21572186
* remove_suid() calls ->setattr without any hint that
@@ -2199,7 +2228,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
21992228

22002229
out_unlock:
22012230
trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
2202-
pos, count);
2231+
pos, count, wait);
2232+
2233+
brelse(di_bh);
22032234

22042235
if (meta_level >= 0)
22052236
ocfs2_inode_unlock(inode, meta_level);
@@ -2211,7 +2242,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
22112242
static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
22122243
struct iov_iter *from)
22132244
{
2214-
int direct_io, rw_level;
2245+
int rw_level;
22152246
ssize_t written = 0;
22162247
ssize_t ret;
22172248
size_t count = iov_iter_count(from);
@@ -2223,19 +2254,26 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
22232254
void *saved_ki_complete = NULL;
22242255
int append_write = ((iocb->ki_pos + count) >=
22252256
i_size_read(inode) ? 1 : 0);
2257+
int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
2258+
int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
22262259

22272260
trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
22282261
(unsigned long long)OCFS2_I(inode)->ip_blkno,
22292262
file->f_path.dentry->d_name.len,
22302263
file->f_path.dentry->d_name.name,
22312264
(unsigned int)from->nr_segs); /* GRRRRR */
22322265

2266+
if (!direct_io && nowait)
2267+
return -EOPNOTSUPP;
2268+
22332269
if (count == 0)
22342270
return 0;
22352271

2236-
direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
2237-
2238-
inode_lock(inode);
2272+
if (nowait) {
2273+
if (!inode_trylock(inode))
2274+
return -EAGAIN;
2275+
} else
2276+
inode_lock(inode);
22392277

22402278
/*
22412279
* Concurrent O_DIRECT writes are allowed with
@@ -2244,9 +2282,13 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
22442282
*/
22452283
rw_level = (!direct_io || full_coherency || append_write);
22462284

2247-
ret = ocfs2_rw_lock(inode, rw_level);
2285+
if (nowait)
2286+
ret = ocfs2_try_rw_lock(inode, rw_level);
2287+
else
2288+
ret = ocfs2_rw_lock(inode, rw_level);
22482289
if (ret < 0) {
2249-
mlog_errno(ret);
2290+
if (ret != -EAGAIN)
2291+
mlog_errno(ret);
22502292
goto out_mutex;
22512293
}
22522294

@@ -2260,9 +2302,13 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
22602302
* other nodes to drop their caches. Buffered I/O
22612303
* already does this in write_begin().
22622304
*/
2263-
ret = ocfs2_inode_lock(inode, NULL, 1);
2305+
if (nowait)
2306+
ret = ocfs2_try_inode_lock(inode, NULL, 1);
2307+
else
2308+
ret = ocfs2_inode_lock(inode, NULL, 1);
22642309
if (ret < 0) {
2265-
mlog_errno(ret);
2310+
if (ret != -EAGAIN)
2311+
mlog_errno(ret);
22662312
goto out;
22672313
}
22682314

@@ -2277,9 +2323,10 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
22772323
}
22782324
count = ret;
22792325

2280-
ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count);
2326+
ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, !nowait);
22812327
if (ret < 0) {
2282-
mlog_errno(ret);
2328+
if (ret != -EAGAIN)
2329+
mlog_errno(ret);
22832330
goto out;
22842331
}
22852332

@@ -2355,6 +2402,8 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
23552402
int ret = 0, rw_level = -1, lock_level = 0;
23562403
struct file *filp = iocb->ki_filp;
23572404
struct inode *inode = file_inode(filp);
2405+
int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
2406+
int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
23582407

23592408
trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
23602409
(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2369,14 +2418,22 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
23692418
goto bail;
23702419
}
23712420

2421+
if (!direct_io && nowait)
2422+
return -EOPNOTSUPP;
2423+
23722424
/*
23732425
* buffered reads protect themselves in ->readpage(). O_DIRECT reads
23742426
* need locks to protect pending reads from racing with truncate.
23752427
*/
2376-
if (iocb->ki_flags & IOCB_DIRECT) {
2377-
ret = ocfs2_rw_lock(inode, 0);
2428+
if (direct_io) {
2429+
if (nowait)
2430+
ret = ocfs2_try_rw_lock(inode, 0);
2431+
else
2432+
ret = ocfs2_rw_lock(inode, 0);
2433+
23782434
if (ret < 0) {
2379-
mlog_errno(ret);
2435+
if (ret != -EAGAIN)
2436+
mlog_errno(ret);
23802437
goto bail;
23812438
}
23822439
rw_level = 0;
@@ -2393,9 +2450,11 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
23932450
* like i_size. This allows the checks down below
23942451
* generic_file_aio_read() a chance of actually working.
23952452
*/
2396-
ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level);
2453+
ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level,
2454+
!nowait);
23972455
if (ret < 0) {
2398-
mlog_errno(ret);
2456+
if (ret != -EAGAIN)
2457+
mlog_errno(ret);
23992458
goto bail;
24002459
}
24012460
ocfs2_inode_unlock(inode, lock_level);

fs/ocfs2/mmap.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
184184
int ret = 0, lock_level = 0;
185185

186186
ret = ocfs2_inode_lock_atime(file_inode(file),
187-
file->f_path.mnt, &lock_level);
187+
file->f_path.mnt, &lock_level, 1);
188188
if (ret < 0) {
189189
mlog_errno(ret);
190190
goto out;

fs/ocfs2/ocfs2_trace.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1449,20 +1449,22 @@ DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_remove_inode_range);
14491449

14501450
TRACE_EVENT(ocfs2_prepare_inode_for_write,
14511451
TP_PROTO(unsigned long long ino, unsigned long long saved_pos,
1452-
unsigned long count),
1453-
TP_ARGS(ino, saved_pos, count),
1452+
unsigned long count, int wait),
1453+
TP_ARGS(ino, saved_pos, count, wait),
14541454
TP_STRUCT__entry(
14551455
__field(unsigned long long, ino)
14561456
__field(unsigned long long, saved_pos)
14571457
__field(unsigned long, count)
1458+
__field(int, wait)
14581459
),
14591460
TP_fast_assign(
14601461
__entry->ino = ino;
14611462
__entry->saved_pos = saved_pos;
14621463
__entry->count = count;
1464+
__entry->wait = wait;
14631465
),
1464-
TP_printk("%llu %llu %lu", __entry->ino,
1465-
__entry->saved_pos, __entry->count)
1466+
TP_printk("%llu %llu %lu %d", __entry->ino,
1467+
__entry->saved_pos, __entry->count, __entry->wait)
14661468
);
14671469

14681470
DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret);

0 commit comments

Comments
 (0)