Skip to content

Commit 518e38a

Browse files
biger410Brian Maly
authored andcommitted
ext4: further refactoring bufferio and dio helper
Keep refactoring bufferio and directio helpers, invoking bufferio helper when directio partial done. inode lock will be released before falling into bufferio helper. That is to support the following patch to make directio overwrite using shared lock. Orabug: 34405736 Signed-off-by: Junxiao Bi <[email protected]> Reviewed-by: Darrick J. Wong <[email protected]> Signed-off-by: Brian Maly <[email protected]>
1 parent 5fa6484 commit 518e38a

File tree

1 file changed

+77
-11
lines changed

1 file changed

+77
-11
lines changed

fs/ext4/file.c

Lines changed: 77 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <linux/pagevec.h>
3030
#include <linux/uio.h>
3131
#include <linux/mman.h>
32+
#include <linux/backing-dev.h>
3233
#include "ext4.h"
3334
#include "ext4_jbd2.h"
3435
#include "xattr.h"
@@ -219,7 +220,8 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
219220
static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
220221
struct iov_iter *from)
221222
{
222-
struct inode *inode = file_inode(iocb->ki_filp);
223+
struct file *file = iocb->ki_filp;
224+
struct inode *inode = file_inode(file);
223225
ssize_t ret;
224226

225227
if (!inode_trylock(inode)) {
@@ -232,25 +234,41 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
232234
if (ret <= 0)
233235
goto out;
234236

235-
ret = __generic_file_write_iter(iocb, from);
236-
inode_unlock(inode);
237+
ret = file_remove_privs(file);
238+
if (ret)
239+
goto out;
237240

238-
if (ret > 0)
239-
ret = generic_write_sync(iocb, ret);
241+
ret = file_update_time(file);
242+
if (ret)
243+
goto out;
240244

241-
return ret;
245+
/* We can write back this queue in page reclaim */
246+
current->backing_dev_info = inode_to_bdi(inode);
247+
ret = generic_perform_write(file, from, iocb->ki_pos);
248+
if (likely(ret > 0))
249+
iocb->ki_pos += ret;
250+
current->backing_dev_info = NULL;
242251

243252
out:
244253
inode_unlock(inode);
254+
255+
if (ret > 0)
256+
ret = generic_write_sync(iocb, ret);
257+
245258
return ret;
246259
}
247260

248261
static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
249262
{
250-
struct inode *inode = file_inode(iocb->ki_filp);
263+
struct file *file = iocb->ki_filp;
264+
struct inode *inode = file_inode(file);
251265
int unaligned_aio = 0;
252266
int overwrite = 0;
253267
ssize_t ret;
268+
struct address_space *mapping = file->f_mapping;
269+
ssize_t status;
270+
loff_t pos, endbyte;
271+
ssize_t err;
254272

255273
if (!inode_trylock(inode)) {
256274
if (iocb->ki_flags & IOCB_NOWAIT)
@@ -260,7 +278,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
260278

261279
ret = ext4_write_checks(iocb, from);
262280
if (ret <= 0)
263-
goto out;
281+
goto err_out;
264282

265283
/*
266284
* Unaligned direct AIO must be serialized among each other as zeroing
@@ -282,11 +300,59 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
282300
overwrite = 1;
283301
} else if (iocb->ki_flags & IOCB_NOWAIT) {
284302
ret = -EAGAIN;
285-
goto out;
303+
goto err_out;
286304
}
287305
}
288306

289-
ret = __generic_file_write_iter(iocb, from);
307+
ret = file_remove_privs(file);
308+
if (ret)
309+
goto err_out;
310+
311+
ret = file_update_time(file);
312+
if (ret)
313+
goto err_out;
314+
315+
ret = generic_file_direct_write(iocb, from);
316+
/*
317+
* If the write stopped short of completing, fall back to
318+
* buffered writes. Some filesystems do this for writes to
319+
* holes, for example. For DAX files, a buffered write will
320+
* not succeed (even if it did, DAX does not handle dirty
321+
* page-cache pages correctly).
322+
*/
323+
if (ret < 0 || !iov_iter_count(from))
324+
goto out;
325+
326+
inode_unlock(inode);
327+
328+
/*direct io partial done, fallen into buffer io. */
329+
pos = iocb->ki_pos;
330+
status = ext4_buffered_write_iter(iocb, from);
331+
if (status < 0)
332+
return ret ? ret : status;
333+
334+
/*
335+
* We need to ensure that the page cache pages are written to
336+
* disk and invalidated to preserve the expected O_DIRECT
337+
* semantics.
338+
*/
339+
endbyte = pos + status - 1;
340+
err = filemap_write_and_wait_range(mapping, pos, endbyte);
341+
if (err == 0) {
342+
iocb->ki_pos = endbyte + 1;
343+
ret += status;
344+
invalidate_mapping_pages(mapping,
345+
pos >> PAGE_SHIFT,
346+
endbyte >> PAGE_SHIFT);
347+
} else {
348+
/*
349+
* We don't know how much we wrote, so just return
350+
* the number of bytes which were direct-written
351+
*/
352+
}
353+
return ret;
354+
355+
out:
290356
/*
291357
* Unaligned direct AIO must be the only IO in flight. Otherwise
292358
* overlapping aligned IO after unaligned might result in data
@@ -301,7 +367,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
301367

302368
return ret;
303369

304-
out:
370+
err_out:
305371
inode_unlock(inode);
306372
return ret;
307373
}

0 commit comments

Comments
 (0)