Skip to content

Commit e940efa

Browse files
committed
Merge tag 'zonefs-6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs
Pull zonefs updates from Damien Le Moal: - Modify the synchronous direct write path to use iomap instead of manually coding issuing zone append write BIOs (me) - Use the FMODE_CAN_ODIRECT file flag to indicate support from direct IO instead of using the old way with noop direct_io methods (Christoph) * tag 'zonefs-6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs: zonefs: set FMODE_CAN_ODIRECT instead of a dummy direct_IO method zonefs: use iomap for synchronous direct writes
2 parents 098c5dd + 8812387 commit e940efa

File tree

3 files changed

+121
-98
lines changed

3 files changed

+121
-98
lines changed

fs/zonefs/file.c

Lines changed: 111 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,6 @@ const struct address_space_operations zonefs_file_aops = {
181181
.migrate_folio = filemap_migrate_folio,
182182
.is_partially_uptodate = iomap_is_partially_uptodate,
183183
.error_remove_page = generic_error_remove_page,
184-
.direct_IO = noop_direct_IO,
185184
.swap_activate = zonefs_swap_activate,
186185
};
187186

@@ -342,6 +341,77 @@ static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence)
342341
return generic_file_llseek_size(file, offset, whence, isize, isize);
343342
}
344343

344+
struct zonefs_zone_append_bio {
345+
/* The target inode of the BIO */
346+
struct inode *inode;
347+
348+
/* For sync writes, the target append write offset */
349+
u64 append_offset;
350+
351+
/*
352+
* This member must come last, bio_alloc_bioset will allocate enough
353+
* bytes for entire zonefs_bio but relies on bio being last.
354+
*/
355+
struct bio bio;
356+
};
357+
358+
static inline struct zonefs_zone_append_bio *
359+
zonefs_zone_append_bio(struct bio *bio)
360+
{
361+
return container_of(bio, struct zonefs_zone_append_bio, bio);
362+
}
363+
364+
static void zonefs_file_zone_append_dio_bio_end_io(struct bio *bio)
365+
{
366+
struct zonefs_zone_append_bio *za_bio = zonefs_zone_append_bio(bio);
367+
struct zonefs_zone *z = zonefs_inode_zone(za_bio->inode);
368+
sector_t za_sector;
369+
370+
if (bio->bi_status != BLK_STS_OK)
371+
goto bio_end;
372+
373+
/*
374+
* If the file zone was written underneath the file system, the zone
375+
* append operation can still succedd (if the zone is not full) but
376+
* the write append location will not be where we expect it to be.
377+
* Check that we wrote where we intended to, that is, at z->z_wpoffset.
378+
*/
379+
za_sector = z->z_sector + (za_bio->append_offset >> SECTOR_SHIFT);
380+
if (bio->bi_iter.bi_sector != za_sector) {
381+
zonefs_warn(za_bio->inode->i_sb,
382+
"Invalid write sector %llu for zone at %llu\n",
383+
bio->bi_iter.bi_sector, z->z_sector);
384+
bio->bi_status = BLK_STS_IOERR;
385+
}
386+
387+
bio_end:
388+
iomap_dio_bio_end_io(bio);
389+
}
390+
391+
static void zonefs_file_zone_append_dio_submit_io(const struct iomap_iter *iter,
392+
struct bio *bio,
393+
loff_t file_offset)
394+
{
395+
struct zonefs_zone_append_bio *za_bio = zonefs_zone_append_bio(bio);
396+
struct inode *inode = iter->inode;
397+
struct zonefs_zone *z = zonefs_inode_zone(inode);
398+
399+
/*
400+
* Issue a zone append BIO to process sync dio writes. The append
401+
* file offset is saved to check the zone append write location
402+
* on completion of the BIO.
403+
*/
404+
za_bio->inode = inode;
405+
za_bio->append_offset = file_offset;
406+
407+
bio->bi_opf &= ~REQ_OP_WRITE;
408+
bio->bi_opf |= REQ_OP_ZONE_APPEND;
409+
bio->bi_iter.bi_sector = z->z_sector;
410+
bio->bi_end_io = zonefs_file_zone_append_dio_bio_end_io;
411+
412+
submit_bio(bio);
413+
}
414+
345415
static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
346416
int error, unsigned int flags)
347417
{
@@ -372,93 +442,17 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
372442
return 0;
373443
}
374444

375-
static const struct iomap_dio_ops zonefs_write_dio_ops = {
376-
.end_io = zonefs_file_write_dio_end_io,
377-
};
445+
static struct bio_set zonefs_zone_append_bio_set;
378446

379-
static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
380-
{
381-
struct inode *inode = file_inode(iocb->ki_filp);
382-
struct zonefs_zone *z = zonefs_inode_zone(inode);
383-
struct block_device *bdev = inode->i_sb->s_bdev;
384-
unsigned int max = bdev_max_zone_append_sectors(bdev);
385-
pgoff_t start, end;
386-
struct bio *bio;
387-
ssize_t size = 0;
388-
int nr_pages;
389-
ssize_t ret;
390-
391-
max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
392-
iov_iter_truncate(from, max);
393-
394-
/*
395-
* If the inode block size (zone write granularity) is smaller than the
396-
* page size, we may be appending data belonging to the last page of the
397-
* inode straddling inode->i_size, with that page already cached due to
398-
* a buffered read or readahead. So make sure to invalidate that page.
399-
* This will always be a no-op for the case where the block size is
400-
* equal to the page size.
401-
*/
402-
start = iocb->ki_pos >> PAGE_SHIFT;
403-
end = (iocb->ki_pos + iov_iter_count(from) - 1) >> PAGE_SHIFT;
404-
if (invalidate_inode_pages2_range(inode->i_mapping, start, end))
405-
return -EBUSY;
406-
407-
nr_pages = iov_iter_npages(from, BIO_MAX_VECS);
408-
if (!nr_pages)
409-
return 0;
410-
411-
bio = bio_alloc(bdev, nr_pages,
412-
REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS);
413-
bio->bi_iter.bi_sector = z->z_sector;
414-
bio->bi_ioprio = iocb->ki_ioprio;
415-
if (iocb_is_dsync(iocb))
416-
bio->bi_opf |= REQ_FUA;
417-
418-
ret = bio_iov_iter_get_pages(bio, from);
419-
if (unlikely(ret))
420-
goto out_release;
421-
422-
size = bio->bi_iter.bi_size;
423-
task_io_account_write(size);
424-
425-
if (iocb->ki_flags & IOCB_HIPRI)
426-
bio_set_polled(bio, iocb);
427-
428-
ret = submit_bio_wait(bio);
429-
430-
/*
431-
* If the file zone was written underneath the file system, the zone
432-
* write pointer may not be where we expect it to be, but the zone
433-
* append write can still succeed. So check manually that we wrote where
434-
* we intended to, that is, at zi->i_wpoffset.
435-
*/
436-
if (!ret) {
437-
sector_t wpsector =
438-
z->z_sector + (z->z_wpoffset >> SECTOR_SHIFT);
439-
440-
if (bio->bi_iter.bi_sector != wpsector) {
441-
zonefs_warn(inode->i_sb,
442-
"Corrupted write pointer %llu for zone at %llu\n",
443-
bio->bi_iter.bi_sector, z->z_sector);
444-
ret = -EIO;
445-
}
446-
}
447-
448-
zonefs_file_write_dio_end_io(iocb, size, ret, 0);
449-
trace_zonefs_file_dio_append(inode, size, ret);
450-
451-
out_release:
452-
bio_release_pages(bio, false);
453-
bio_put(bio);
454-
455-
if (ret >= 0) {
456-
iocb->ki_pos += size;
457-
return size;
458-
}
447+
static const struct iomap_dio_ops zonefs_zone_append_dio_ops = {
448+
.submit_io = zonefs_file_zone_append_dio_submit_io,
449+
.end_io = zonefs_file_write_dio_end_io,
450+
.bio_set = &zonefs_zone_append_bio_set,
451+
};
459452

460-
return ret;
461-
}
453+
static const struct iomap_dio_ops zonefs_write_dio_ops = {
454+
.end_io = zonefs_file_write_dio_end_io,
455+
};
462456

463457
/*
464458
* Do not exceed the LFS limits nor the file zone size. If pos is under the
@@ -539,6 +533,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
539533
struct zonefs_inode_info *zi = ZONEFS_I(inode);
540534
struct zonefs_zone *z = zonefs_inode_zone(inode);
541535
struct super_block *sb = inode->i_sb;
536+
const struct iomap_dio_ops *dio_ops;
542537
bool sync = is_sync_kiocb(iocb);
543538
bool append = false;
544539
ssize_t ret, count;
@@ -582,20 +577,26 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
582577
}
583578

584579
if (append) {
585-
ret = zonefs_file_dio_append(iocb, from);
580+
unsigned int max = bdev_max_zone_append_sectors(sb->s_bdev);
581+
582+
max = ALIGN_DOWN(max << SECTOR_SHIFT, sb->s_blocksize);
583+
iov_iter_truncate(from, max);
584+
585+
dio_ops = &zonefs_zone_append_dio_ops;
586586
} else {
587-
/*
588-
* iomap_dio_rw() may return ENOTBLK if there was an issue with
589-
* page invalidation. Overwrite that error code with EBUSY to
590-
* be consistent with zonefs_file_dio_append() return value for
591-
* similar issues.
592-
*/
593-
ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
594-
&zonefs_write_dio_ops, 0, NULL, 0);
595-
if (ret == -ENOTBLK)
596-
ret = -EBUSY;
587+
dio_ops = &zonefs_write_dio_ops;
597588
}
598589

590+
/*
591+
* iomap_dio_rw() may return ENOTBLK if there was an issue with
592+
* page invalidation. Overwrite that error code with EBUSY so that
593+
* the user can make sense of the error.
594+
*/
595+
ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
596+
dio_ops, 0, NULL, 0);
597+
if (ret == -ENOTBLK)
598+
ret = -EBUSY;
599+
599600
if (zonefs_zone_is_seq(z) &&
600601
(ret > 0 || ret == -EIOCBQUEUED)) {
601602
if (ret > 0)
@@ -813,6 +814,7 @@ static int zonefs_file_open(struct inode *inode, struct file *file)
813814
{
814815
int ret;
815816

817+
file->f_mode |= FMODE_CAN_ODIRECT;
816818
ret = generic_file_open(inode, file);
817819
if (ret)
818820
return ret;
@@ -900,3 +902,15 @@ const struct file_operations zonefs_file_operations = {
900902
.splice_write = iter_file_splice_write,
901903
.iopoll = iocb_bio_iopoll,
902904
};
905+
906+
int zonefs_file_bioset_init(void)
907+
{
908+
return bioset_init(&zonefs_zone_append_bio_set, BIO_POOL_SIZE,
909+
offsetof(struct zonefs_zone_append_bio, bio),
910+
BIOSET_NEED_BVECS);
911+
}
912+
913+
void zonefs_file_bioset_exit(void)
914+
{
915+
bioset_exit(&zonefs_zone_append_bio_set);
916+
}

fs/zonefs/super.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1412,10 +1412,14 @@ static int __init zonefs_init(void)
14121412

14131413
BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE);
14141414

1415-
ret = zonefs_init_inodecache();
1415+
ret = zonefs_file_bioset_init();
14161416
if (ret)
14171417
return ret;
14181418

1419+
ret = zonefs_init_inodecache();
1420+
if (ret)
1421+
goto destroy_bioset;
1422+
14191423
ret = zonefs_sysfs_init();
14201424
if (ret)
14211425
goto destroy_inodecache;
@@ -1430,6 +1434,8 @@ static int __init zonefs_init(void)
14301434
zonefs_sysfs_exit();
14311435
destroy_inodecache:
14321436
zonefs_destroy_inodecache();
1437+
destroy_bioset:
1438+
zonefs_file_bioset_exit();
14331439

14341440
return ret;
14351441
}
@@ -1439,6 +1445,7 @@ static void __exit zonefs_exit(void)
14391445
unregister_filesystem(&zonefs_type);
14401446
zonefs_sysfs_exit();
14411447
zonefs_destroy_inodecache();
1448+
zonefs_file_bioset_exit();
14421449
}
14431450

14441451
MODULE_AUTHOR("Damien Le Moal");

fs/zonefs/zonefs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,8 @@ extern const struct file_operations zonefs_dir_operations;
279279
extern const struct address_space_operations zonefs_file_aops;
280280
extern const struct file_operations zonefs_file_operations;
281281
int zonefs_file_truncate(struct inode *inode, loff_t isize);
282+
int zonefs_file_bioset_init(void);
283+
void zonefs_file_bioset_exit(void);
282284

283285
/* In sysfs.c */
284286
int zonefs_sysfs_register(struct super_block *sb);

0 commit comments

Comments
 (0)