@@ -3295,7 +3295,9 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3295
3295
}
3296
3296
3297
3297
/*
3298
- * For ext4 extent files, ext4 will do direct-io write to holes,
3298
+ * Handling of direct IO writes.
3299
+ *
3300
+ * For ext4 extent files, ext4 will do direct-io write even to holes,
3299
3301
* preallocated extents, and those write extend the file, no need to
3300
3302
* fall back to buffered IO.
3301
3303
*
@@ -3313,21 +3315,37 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3313
3315
* if the machine crashes during the write.
3314
3316
*
3315
3317
*/
3316
- static ssize_t ext4_ext_direct_IO (struct kiocb * iocb , struct iov_iter * iter ,
3317
- loff_t offset )
3318
+ static ssize_t ext4_direct_IO_write (struct kiocb * iocb , struct iov_iter * iter ,
3319
+ loff_t offset )
3318
3320
{
3319
3321
struct file * file = iocb -> ki_filp ;
3320
3322
struct inode * inode = file -> f_mapping -> host ;
3323
+ struct ext4_inode_info * ei = EXT4_I (inode );
3321
3324
ssize_t ret ;
3322
3325
size_t count = iov_iter_count (iter );
3323
3326
int overwrite = 0 ;
3324
3327
get_block_t * get_block_func = NULL ;
3325
3328
int dio_flags = 0 ;
3326
3329
loff_t final_size = offset + count ;
3330
+ int orphan = 0 ;
3331
+ handle_t * handle ;
3327
3332
3328
- /* Use the old path for reads and writes beyond i_size. */
3329
- if (iov_iter_rw (iter ) != WRITE || final_size > inode -> i_size )
3330
- return ext4_ind_direct_IO (iocb , iter , offset );
3333
+ if (final_size > inode -> i_size ) {
3334
+ /* Credits for sb + inode write */
3335
+ handle = ext4_journal_start (inode , EXT4_HT_INODE , 2 );
3336
+ if (IS_ERR (handle )) {
3337
+ ret = PTR_ERR (handle );
3338
+ goto out ;
3339
+ }
3340
+ ret = ext4_orphan_add (handle , inode );
3341
+ if (ret ) {
3342
+ ext4_journal_stop (handle );
3343
+ goto out ;
3344
+ }
3345
+ orphan = 1 ;
3346
+ ei -> i_disksize = inode -> i_size ;
3347
+ ext4_journal_stop (handle );
3348
+ }
3331
3349
3332
3350
BUG_ON (iocb -> private == NULL );
3333
3351
@@ -3336,8 +3354,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
3336
3354
* conversion. This also disallows race between truncate() and
3337
3355
* overwrite DIO as i_dio_count needs to be incremented under i_mutex.
3338
3356
*/
3339
- if (iov_iter_rw (iter ) == WRITE )
3340
- inode_dio_begin (inode );
3357
+ inode_dio_begin (inode );
3341
3358
3342
3359
/* If we do a overwrite dio, i_mutex locking can be released */
3343
3360
overwrite = * ((int * )iocb -> private );
@@ -3346,7 +3363,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
3346
3363
inode_unlock (inode );
3347
3364
3348
3365
/*
3349
- * We could direct write to holes and fallocate.
3366
+ * For extent mapped files we could direct write to holes and fallocate.
3350
3367
*
3351
3368
* Allocated blocks to fill the hole are marked as unwritten to prevent
3352
3369
* parallel buffered read to expose the stale data before DIO complete
@@ -3368,7 +3385,11 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
3368
3385
iocb -> private = NULL ;
3369
3386
if (overwrite )
3370
3387
get_block_func = ext4_dio_get_block_overwrite ;
3371
- else if (is_sync_kiocb (iocb )) {
3388
+ else if (!ext4_test_inode_flag (inode , EXT4_INODE_EXTENTS ) ||
3389
+ round_down (offset , 1 << inode -> i_blkbits ) >= inode -> i_size ) {
3390
+ get_block_func = ext4_dio_get_block ;
3391
+ dio_flags = DIO_LOCKING | DIO_SKIP_HOLES ;
3392
+ } else if (is_sync_kiocb (iocb )) {
3372
3393
get_block_func = ext4_dio_get_block_unwritten_sync ;
3373
3394
dio_flags = DIO_LOCKING ;
3374
3395
} else {
@@ -3378,10 +3399,11 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
3378
3399
#ifdef CONFIG_EXT4_FS_ENCRYPTION
3379
3400
BUG_ON (ext4_encrypted_inode (inode ) && S_ISREG (inode -> i_mode ));
3380
3401
#endif
3381
- if (IS_DAX (inode ))
3402
+ if (IS_DAX (inode )) {
3403
+ dio_flags &= ~DIO_SKIP_HOLES ;
3382
3404
ret = dax_do_io (iocb , inode , iter , offset , get_block_func ,
3383
3405
ext4_end_io_dio , dio_flags );
3384
- else
3406
+ } else
3385
3407
ret = __blockdev_direct_IO (iocb , inode ,
3386
3408
inode -> i_sb -> s_bdev , iter , offset ,
3387
3409
get_block_func ,
@@ -3401,12 +3423,87 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
3401
3423
ext4_clear_inode_state (inode , EXT4_STATE_DIO_UNWRITTEN );
3402
3424
}
3403
3425
3404
- if (iov_iter_rw (iter ) == WRITE )
3405
- inode_dio_end (inode );
3426
+ inode_dio_end (inode );
3406
3427
/* take i_mutex locking again if we do a ovewrite dio */
3407
3428
if (overwrite )
3408
3429
inode_lock (inode );
3409
3430
3431
+ if (ret < 0 && final_size > inode -> i_size )
3432
+ ext4_truncate_failed_write (inode );
3433
+
3434
+ /* Handle extending of i_size after direct IO write */
3435
+ if (orphan ) {
3436
+ int err ;
3437
+
3438
+ /* Credits for sb + inode write */
3439
+ handle = ext4_journal_start (inode , EXT4_HT_INODE , 2 );
3440
+ if (IS_ERR (handle )) {
3441
+ /* This is really bad luck. We've written the data
3442
+ * but cannot extend i_size. Bail out and pretend
3443
+ * the write failed... */
3444
+ ret = PTR_ERR (handle );
3445
+ if (inode -> i_nlink )
3446
+ ext4_orphan_del (NULL , inode );
3447
+
3448
+ goto out ;
3449
+ }
3450
+ if (inode -> i_nlink )
3451
+ ext4_orphan_del (handle , inode );
3452
+ if (ret > 0 ) {
3453
+ loff_t end = offset + ret ;
3454
+ if (end > inode -> i_size ) {
3455
+ ei -> i_disksize = end ;
3456
+ i_size_write (inode , end );
3457
+ /*
3458
+ * We're going to return a positive `ret'
3459
+ * here due to non-zero-length I/O, so there's
3460
+ * no way of reporting error returns from
3461
+ * ext4_mark_inode_dirty() to userspace. So
3462
+ * ignore it.
3463
+ */
3464
+ ext4_mark_inode_dirty (handle , inode );
3465
+ }
3466
+ }
3467
+ err = ext4_journal_stop (handle );
3468
+ if (ret == 0 )
3469
+ ret = err ;
3470
+ }
3471
+ out :
3472
+ return ret ;
3473
+ }
3474
+
3475
+ static ssize_t ext4_direct_IO_read (struct kiocb * iocb , struct iov_iter * iter ,
3476
+ loff_t offset )
3477
+ {
3478
+ int unlocked = 0 ;
3479
+ struct inode * inode = iocb -> ki_filp -> f_mapping -> host ;
3480
+ ssize_t ret ;
3481
+
3482
+ if (ext4_should_dioread_nolock (inode )) {
3483
+ /*
3484
+ * Nolock dioread optimization may be dynamically disabled
3485
+ * via ext4_inode_block_unlocked_dio(). Check inode's state
3486
+ * while holding extra i_dio_count ref.
3487
+ */
3488
+ inode_dio_begin (inode );
3489
+ smp_mb ();
3490
+ if (unlikely (ext4_test_inode_state (inode ,
3491
+ EXT4_STATE_DIOREAD_LOCK )))
3492
+ inode_dio_end (inode );
3493
+ else
3494
+ unlocked = 1 ;
3495
+ }
3496
+ if (IS_DAX (inode )) {
3497
+ ret = dax_do_io (iocb , inode , iter , offset , ext4_dio_get_block ,
3498
+ NULL , unlocked ? 0 : DIO_LOCKING );
3499
+ } else {
3500
+ ret = __blockdev_direct_IO (iocb , inode , inode -> i_sb -> s_bdev ,
3501
+ iter , offset , ext4_dio_get_block ,
3502
+ NULL , NULL ,
3503
+ unlocked ? 0 : DIO_LOCKING );
3504
+ }
3505
+ if (unlocked )
3506
+ inode_dio_end (inode );
3410
3507
return ret ;
3411
3508
}
3412
3509
@@ -3434,10 +3531,10 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
3434
3531
return 0 ;
3435
3532
3436
3533
trace_ext4_direct_IO_enter (inode , offset , count , iov_iter_rw (iter ));
3437
- if (ext4_test_inode_flag ( inode , EXT4_INODE_EXTENTS ) )
3438
- ret = ext4_ext_direct_IO (iocb , iter , offset );
3534
+ if (iov_iter_rw ( iter ) == READ )
3535
+ ret = ext4_direct_IO_read (iocb , iter , offset );
3439
3536
else
3440
- ret = ext4_ind_direct_IO (iocb , iter , offset );
3537
+ ret = ext4_direct_IO_write (iocb , iter , offset );
3441
3538
trace_ext4_direct_IO_exit (inode , offset , count , iov_iter_rw (iter ), ret );
3442
3539
return ret ;
3443
3540
}
0 commit comments