@@ -181,7 +181,6 @@ const struct address_space_operations zonefs_file_aops = {
181
181
.migrate_folio = filemap_migrate_folio ,
182
182
.is_partially_uptodate = iomap_is_partially_uptodate ,
183
183
.error_remove_page = generic_error_remove_page ,
184
- .direct_IO = noop_direct_IO ,
185
184
.swap_activate = zonefs_swap_activate ,
186
185
};
187
186
@@ -342,6 +341,77 @@ static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence)
342
341
return generic_file_llseek_size (file , offset , whence , isize , isize );
343
342
}
344
343
344
+ struct zonefs_zone_append_bio {
345
+ /* The target inode of the BIO */
346
+ struct inode * inode ;
347
+
348
+ /* For sync writes, the target append write offset */
349
+ u64 append_offset ;
350
+
351
+ /*
352
+ * This member must come last, bio_alloc_bioset will allocate enough
353
+ * bytes for entire zonefs_bio but relies on bio being last.
354
+ */
355
+ struct bio bio ;
356
+ };
357
+
358
+ static inline struct zonefs_zone_append_bio *
359
+ zonefs_zone_append_bio (struct bio * bio )
360
+ {
361
+ return container_of (bio , struct zonefs_zone_append_bio , bio );
362
+ }
363
+
364
+ static void zonefs_file_zone_append_dio_bio_end_io (struct bio * bio )
365
+ {
366
+ struct zonefs_zone_append_bio * za_bio = zonefs_zone_append_bio (bio );
367
+ struct zonefs_zone * z = zonefs_inode_zone (za_bio -> inode );
368
+ sector_t za_sector ;
369
+
370
+ if (bio -> bi_status != BLK_STS_OK )
371
+ goto bio_end ;
372
+
373
+ /*
374
+ * If the file zone was written underneath the file system, the zone
375
+ * append operation can still succedd (if the zone is not full) but
376
+ * the write append location will not be where we expect it to be.
377
+ * Check that we wrote where we intended to, that is, at z->z_wpoffset.
378
+ */
379
+ za_sector = z -> z_sector + (za_bio -> append_offset >> SECTOR_SHIFT );
380
+ if (bio -> bi_iter .bi_sector != za_sector ) {
381
+ zonefs_warn (za_bio -> inode -> i_sb ,
382
+ "Invalid write sector %llu for zone at %llu\n" ,
383
+ bio -> bi_iter .bi_sector , z -> z_sector );
384
+ bio -> bi_status = BLK_STS_IOERR ;
385
+ }
386
+
387
+ bio_end :
388
+ iomap_dio_bio_end_io (bio );
389
+ }
390
+
391
+ static void zonefs_file_zone_append_dio_submit_io (const struct iomap_iter * iter ,
392
+ struct bio * bio ,
393
+ loff_t file_offset )
394
+ {
395
+ struct zonefs_zone_append_bio * za_bio = zonefs_zone_append_bio (bio );
396
+ struct inode * inode = iter -> inode ;
397
+ struct zonefs_zone * z = zonefs_inode_zone (inode );
398
+
399
+ /*
400
+ * Issue a zone append BIO to process sync dio writes. The append
401
+ * file offset is saved to check the zone append write location
402
+ * on completion of the BIO.
403
+ */
404
+ za_bio -> inode = inode ;
405
+ za_bio -> append_offset = file_offset ;
406
+
407
+ bio -> bi_opf &= ~REQ_OP_WRITE ;
408
+ bio -> bi_opf |= REQ_OP_ZONE_APPEND ;
409
+ bio -> bi_iter .bi_sector = z -> z_sector ;
410
+ bio -> bi_end_io = zonefs_file_zone_append_dio_bio_end_io ;
411
+
412
+ submit_bio (bio );
413
+ }
414
+
345
415
static int zonefs_file_write_dio_end_io (struct kiocb * iocb , ssize_t size ,
346
416
int error , unsigned int flags )
347
417
{
@@ -372,93 +442,17 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
372
442
return 0 ;
373
443
}
374
444
375
- static const struct iomap_dio_ops zonefs_write_dio_ops = {
376
- .end_io = zonefs_file_write_dio_end_io ,
377
- };
445
+ static struct bio_set zonefs_zone_append_bio_set ;
378
446
379
- static ssize_t zonefs_file_dio_append (struct kiocb * iocb , struct iov_iter * from )
380
- {
381
- struct inode * inode = file_inode (iocb -> ki_filp );
382
- struct zonefs_zone * z = zonefs_inode_zone (inode );
383
- struct block_device * bdev = inode -> i_sb -> s_bdev ;
384
- unsigned int max = bdev_max_zone_append_sectors (bdev );
385
- pgoff_t start , end ;
386
- struct bio * bio ;
387
- ssize_t size = 0 ;
388
- int nr_pages ;
389
- ssize_t ret ;
390
-
391
- max = ALIGN_DOWN (max << SECTOR_SHIFT , inode -> i_sb -> s_blocksize );
392
- iov_iter_truncate (from , max );
393
-
394
- /*
395
- * If the inode block size (zone write granularity) is smaller than the
396
- * page size, we may be appending data belonging to the last page of the
397
- * inode straddling inode->i_size, with that page already cached due to
398
- * a buffered read or readahead. So make sure to invalidate that page.
399
- * This will always be a no-op for the case where the block size is
400
- * equal to the page size.
401
- */
402
- start = iocb -> ki_pos >> PAGE_SHIFT ;
403
- end = (iocb -> ki_pos + iov_iter_count (from ) - 1 ) >> PAGE_SHIFT ;
404
- if (invalidate_inode_pages2_range (inode -> i_mapping , start , end ))
405
- return - EBUSY ;
406
-
407
- nr_pages = iov_iter_npages (from , BIO_MAX_VECS );
408
- if (!nr_pages )
409
- return 0 ;
410
-
411
- bio = bio_alloc (bdev , nr_pages ,
412
- REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE , GFP_NOFS );
413
- bio -> bi_iter .bi_sector = z -> z_sector ;
414
- bio -> bi_ioprio = iocb -> ki_ioprio ;
415
- if (iocb_is_dsync (iocb ))
416
- bio -> bi_opf |= REQ_FUA ;
417
-
418
- ret = bio_iov_iter_get_pages (bio , from );
419
- if (unlikely (ret ))
420
- goto out_release ;
421
-
422
- size = bio -> bi_iter .bi_size ;
423
- task_io_account_write (size );
424
-
425
- if (iocb -> ki_flags & IOCB_HIPRI )
426
- bio_set_polled (bio , iocb );
427
-
428
- ret = submit_bio_wait (bio );
429
-
430
- /*
431
- * If the file zone was written underneath the file system, the zone
432
- * write pointer may not be where we expect it to be, but the zone
433
- * append write can still succeed. So check manually that we wrote where
434
- * we intended to, that is, at zi->i_wpoffset.
435
- */
436
- if (!ret ) {
437
- sector_t wpsector =
438
- z -> z_sector + (z -> z_wpoffset >> SECTOR_SHIFT );
439
-
440
- if (bio -> bi_iter .bi_sector != wpsector ) {
441
- zonefs_warn (inode -> i_sb ,
442
- "Corrupted write pointer %llu for zone at %llu\n" ,
443
- bio -> bi_iter .bi_sector , z -> z_sector );
444
- ret = - EIO ;
445
- }
446
- }
447
-
448
- zonefs_file_write_dio_end_io (iocb , size , ret , 0 );
449
- trace_zonefs_file_dio_append (inode , size , ret );
450
-
451
- out_release :
452
- bio_release_pages (bio , false);
453
- bio_put (bio );
454
-
455
- if (ret >= 0 ) {
456
- iocb -> ki_pos += size ;
457
- return size ;
458
- }
447
+ static const struct iomap_dio_ops zonefs_zone_append_dio_ops = {
448
+ .submit_io = zonefs_file_zone_append_dio_submit_io ,
449
+ .end_io = zonefs_file_write_dio_end_io ,
450
+ .bio_set = & zonefs_zone_append_bio_set ,
451
+ };
459
452
460
- return ret ;
461
- }
453
+ static const struct iomap_dio_ops zonefs_write_dio_ops = {
454
+ .end_io = zonefs_file_write_dio_end_io ,
455
+ };
462
456
463
457
/*
464
458
* Do not exceed the LFS limits nor the file zone size. If pos is under the
@@ -539,6 +533,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
539
533
struct zonefs_inode_info * zi = ZONEFS_I (inode );
540
534
struct zonefs_zone * z = zonefs_inode_zone (inode );
541
535
struct super_block * sb = inode -> i_sb ;
536
+ const struct iomap_dio_ops * dio_ops ;
542
537
bool sync = is_sync_kiocb (iocb );
543
538
bool append = false;
544
539
ssize_t ret , count ;
@@ -582,20 +577,26 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
582
577
}
583
578
584
579
if (append ) {
585
- ret = zonefs_file_dio_append (iocb , from );
580
+ unsigned int max = bdev_max_zone_append_sectors (sb -> s_bdev );
581
+
582
+ max = ALIGN_DOWN (max << SECTOR_SHIFT , sb -> s_blocksize );
583
+ iov_iter_truncate (from , max );
584
+
585
+ dio_ops = & zonefs_zone_append_dio_ops ;
586
586
} else {
587
- /*
588
- * iomap_dio_rw() may return ENOTBLK if there was an issue with
589
- * page invalidation. Overwrite that error code with EBUSY to
590
- * be consistent with zonefs_file_dio_append() return value for
591
- * similar issues.
592
- */
593
- ret = iomap_dio_rw (iocb , from , & zonefs_write_iomap_ops ,
594
- & zonefs_write_dio_ops , 0 , NULL , 0 );
595
- if (ret == - ENOTBLK )
596
- ret = - EBUSY ;
587
+ dio_ops = & zonefs_write_dio_ops ;
597
588
}
598
589
590
+ /*
591
+ * iomap_dio_rw() may return ENOTBLK if there was an issue with
592
+ * page invalidation. Overwrite that error code with EBUSY so that
593
+ * the user can make sense of the error.
594
+ */
595
+ ret = iomap_dio_rw (iocb , from , & zonefs_write_iomap_ops ,
596
+ dio_ops , 0 , NULL , 0 );
597
+ if (ret == - ENOTBLK )
598
+ ret = - EBUSY ;
599
+
599
600
if (zonefs_zone_is_seq (z ) &&
600
601
(ret > 0 || ret == - EIOCBQUEUED )) {
601
602
if (ret > 0 )
@@ -813,6 +814,7 @@ static int zonefs_file_open(struct inode *inode, struct file *file)
813
814
{
814
815
int ret ;
815
816
817
+ file -> f_mode |= FMODE_CAN_ODIRECT ;
816
818
ret = generic_file_open (inode , file );
817
819
if (ret )
818
820
return ret ;
@@ -900,3 +902,15 @@ const struct file_operations zonefs_file_operations = {
900
902
.splice_write = iter_file_splice_write ,
901
903
.iopoll = iocb_bio_iopoll ,
902
904
};
905
+
906
+ int zonefs_file_bioset_init (void )
907
+ {
908
+ return bioset_init (& zonefs_zone_append_bio_set , BIO_POOL_SIZE ,
909
+ offsetof(struct zonefs_zone_append_bio , bio ),
910
+ BIOSET_NEED_BVECS );
911
+ }
912
+
913
+ void zonefs_file_bioset_exit (void )
914
+ {
915
+ bioset_exit (& zonefs_zone_append_bio_set );
916
+ }
0 commit comments