Skip to content

Commit 0e01df1

Browse files
committed
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "Fix a number of bugs, most notably a potential stale data exposure after a crash and a potential BUG_ON crash if a file has the data journalling flag enabled while it has dirty delayed allocation blocks that haven't been written yet. Also fix a potential crash in the new project quota code and a maliciously corrupted file system. In addition, fix some DAX-specific bugs, including when there is a transient ENOSPC situation and races between writes via direct I/O and an mmap'ed segment that could lead to lost I/O. Finally the usual set of miscellaneous cleanups" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (23 commits) ext4: pre-zero allocated blocks for DAX IO ext4: refactor direct IO code ext4: fix race in transient ENOSPC detection ext4: handle transient ENOSPC properly for DAX dax: call get_blocks() with create == 1 for write faults to unwritten extents ext4: remove unmeetable inconsisteny check from ext4_find_extent() jbd2: remove excess descriptions for handle_s ext4: remove unnecessary bio get/put ext4: silence UBSAN in ext4_mb_init() ext4: address UBSAN warning in mb_find_order_for_block() ext4: fix oops on corrupted filesystem ext4: fix check of dqget() return value in ext4_ioctl_setproject() ext4: clean up error handling when orphan list is corrupted ext4: fix hang when processing corrupted orphaned inode list ext4: remove trailing \n from ext4_warning/ext4_error calls ext4: fix races between changing inode journal mode and ext4_writepages ext4: handle unwritten or delalloc buffers before enabling data journaling ext4: fix jbd2 handle extension in ext4_ext_truncate_extend_restart() ext4: do not ask jbd2 to write data for delalloc buffers jbd2: add support for avoiding data writes during transaction commits ...
2 parents a56f489 + 12735f8 commit 0e01df1

28 files changed

+364
-313
lines changed

fs/compat.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -936,6 +936,8 @@ static int compat_filldir(struct dir_context *ctx, const char *name, int namlen,
936936
}
937937
dirent = buf->previous;
938938
if (dirent) {
939+
if (signal_pending(current))
940+
return -EINTR;
939941
if (__put_user(offset, &dirent->d_off))
940942
goto efault;
941943
}
@@ -1020,6 +1022,8 @@ static int compat_filldir64(struct dir_context *ctx, const char *name,
10201022
dirent = buf->previous;
10211023

10221024
if (dirent) {
1025+
if (signal_pending(current))
1026+
return -EINTR;
10231027
if (__put_user_unaligned(offset, &dirent->d_off))
10241028
goto efault;
10251029
}

fs/dax.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -676,7 +676,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
676676
if (error)
677677
goto unlock_page;
678678

679-
if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) {
679+
if (!buffer_mapped(&bh) && !vmf->cow_page) {
680680
if (vmf->flags & FAULT_FLAG_WRITE) {
681681
error = get_block(inode, block, &bh, 1);
682682
count_vm_event(PGMAJFAULT);

fs/ext4/balloc.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -610,7 +610,8 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
610610

611611
jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
612612

613-
return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
613+
jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
614+
return 1;
614615
}
615616

616617
/*

fs/ext4/dir.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
150150
while (ctx->pos < inode->i_size) {
151151
struct ext4_map_blocks map;
152152

153+
if (fatal_signal_pending(current)) {
154+
err = -ERESTARTSYS;
155+
goto errout;
156+
}
157+
cond_resched();
153158
map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb);
154159
map.m_len = 1;
155160
err = ext4_map_blocks(NULL, inode, &map, 0);

fs/ext4/ext4.h

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <linux/ratelimit.h>
3434
#include <crypto/hash.h>
3535
#include <linux/falloc.h>
36+
#include <linux/percpu-rwsem.h>
3637
#ifdef __KERNEL__
3738
#include <linux/compat.h>
3839
#endif
@@ -581,6 +582,9 @@ enum {
581582
#define EXT4_GET_BLOCKS_ZERO 0x0200
582583
#define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\
583584
EXT4_GET_BLOCKS_ZERO)
585+
/* Caller will submit data before dropping transaction handle. This
586+
* allows jbd2 to avoid submitting data before commit. */
587+
#define EXT4_GET_BLOCKS_IO_SUBMIT 0x0400
584588

585589
/*
586590
* The bit position of these flags must not overlap with any of the
@@ -1505,6 +1509,9 @@ struct ext4_sb_info {
15051509
struct ratelimit_state s_err_ratelimit_state;
15061510
struct ratelimit_state s_warning_ratelimit_state;
15071511
struct ratelimit_state s_msg_ratelimit_state;
1512+
1513+
/* Barrier between changing inodes' journal flags and writepages ops. */
1514+
struct percpu_rw_semaphore s_journal_flag_rwsem;
15081515
};
15091516

15101517
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1549,7 +1556,6 @@ enum {
15491556
EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read
15501557
nolocking */
15511558
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
1552-
EXT4_STATE_ORDERED_MODE, /* data=ordered mode */
15531559
EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
15541560
};
15551561

@@ -2521,8 +2527,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
25212527
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
25222528
int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
25232529
struct buffer_head *bh_result, int create);
2524-
int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
2525-
struct buffer_head *bh_result, int create);
2530+
int ext4_dax_get_block(struct inode *inode, sector_t iblock,
2531+
struct buffer_head *bh_result, int create);
25262532
int ext4_get_block(struct inode *inode, sector_t iblock,
25272533
struct buffer_head *bh_result, int create);
25282534
int ext4_dio_get_block(struct inode *inode, sector_t iblock,
@@ -2581,7 +2587,6 @@ extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
25812587
/* indirect.c */
25822588
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
25832589
struct ext4_map_blocks *map, int flags);
2584-
extern ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
25852590
extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
25862591
extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
25872592
extern void ext4_ind_truncate(handle_t *, struct inode *inode);
@@ -3329,6 +3334,13 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
33293334
}
33303335
}
33313336

3337+
static inline bool ext4_aligned_io(struct inode *inode, loff_t off, loff_t len)
3338+
{
3339+
int blksize = 1 << inode->i_blkbits;
3340+
3341+
return IS_ALIGNED(off, blksize) && IS_ALIGNED(len, blksize);
3342+
}
3343+
33323344
#endif /* __KERNEL__ */
33333345

33343346
#define EFSBADCRC EBADMSG /* Bad CRC detected */

fs/ext4/ext4_jbd2.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,10 +359,21 @@ static inline int ext4_journal_force_commit(journal_t *journal)
359359
return 0;
360360
}
361361

362-
static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
362+
static inline int ext4_jbd2_inode_add_write(handle_t *handle,
363+
struct inode *inode)
363364
{
364365
if (ext4_handle_valid(handle))
365-
return jbd2_journal_file_inode(handle, EXT4_I(inode)->jinode);
366+
return jbd2_journal_inode_add_write(handle,
367+
EXT4_I(inode)->jinode);
368+
return 0;
369+
}
370+
371+
static inline int ext4_jbd2_inode_add_wait(handle_t *handle,
372+
struct inode *inode)
373+
{
374+
if (ext4_handle_valid(handle))
375+
return jbd2_journal_inode_add_wait(handle,
376+
EXT4_I(inode)->jinode);
366377
return 0;
367378
}
368379

fs/ext4/extents.c

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,14 @@ static int ext4_ext_truncate_extend_restart(handle_t *handle,
120120

121121
if (!ext4_handle_valid(handle))
122122
return 0;
123-
if (handle->h_buffer_credits > needed)
123+
if (handle->h_buffer_credits >= needed)
124124
return 0;
125-
err = ext4_journal_extend(handle, needed);
125+
/*
126+
* If we need to extend the journal get a few extra blocks
127+
* while we're at it for efficiency's sake.
128+
*/
129+
needed += 3;
130+
err = ext4_journal_extend(handle, needed - handle->h_buffer_credits);
126131
if (err <= 0)
127132
return err;
128133
err = ext4_truncate_restart_trans(handle, inode, needed);
@@ -907,13 +912,6 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
907912

908913
eh = ext_block_hdr(bh);
909914
ppos++;
910-
if (unlikely(ppos > depth)) {
911-
put_bh(bh);
912-
EXT4_ERROR_INODE(inode,
913-
"ppos %d > depth %d", ppos, depth);
914-
ret = -EFSCORRUPTED;
915-
goto err;
916-
}
917915
path[ppos].p_bh = bh;
918916
path[ppos].p_hdr = eh;
919917
}
@@ -2583,7 +2581,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
25832581
}
25842582
} else
25852583
ext4_error(sbi->s_sb, "strange request: removal(2) "
2586-
"%u-%u from %u:%u\n",
2584+
"%u-%u from %u:%u",
25872585
from, to, le32_to_cpu(ex->ee_block), ee_len);
25882586
return 0;
25892587
}
@@ -3738,7 +3736,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
37383736
if (ee_block != map->m_lblk || ee_len > map->m_len) {
37393737
#ifdef EXT4_DEBUG
37403738
ext4_warning("Inode (%ld) finished: extent logical block %llu,"
3741-
" len %u; IO logical block %llu, len %u\n",
3739+
" len %u; IO logical block %llu, len %u",
37423740
inode->i_ino, (unsigned long long)ee_block, ee_len,
37433741
(unsigned long long)map->m_lblk, map->m_len);
37443742
#endif

fs/ext4/extents_status.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -707,7 +707,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
707707
(status & EXTENT_STATUS_WRITTEN)) {
708708
ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as "
709709
" delayed and written which can potentially "
710-
" cause data loss.\n", lblk, len);
710+
" cause data loss.", lblk, len);
711711
WARN_ON(1);
712712
}
713713

fs/ext4/file.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
202202
if (IS_ERR(handle))
203203
result = VM_FAULT_SIGBUS;
204204
else
205-
result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL);
205+
result = __dax_fault(vma, vmf, ext4_dax_get_block, NULL);
206206

207207
if (write) {
208208
if (!IS_ERR(handle))
@@ -238,7 +238,7 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
238238
result = VM_FAULT_SIGBUS;
239239
else
240240
result = __dax_pmd_fault(vma, addr, pmd, flags,
241-
ext4_dax_mmap_get_block, NULL);
241+
ext4_dax_get_block, NULL);
242242

243243
if (write) {
244244
if (!IS_ERR(handle))
@@ -373,7 +373,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
373373
if (ext4_encrypted_inode(d_inode(dir)) &&
374374
!ext4_is_child_context_consistent_with_parent(d_inode(dir), inode)) {
375375
ext4_warning(inode->i_sb,
376-
"Inconsistent encryption contexts: %lu/%lu\n",
376+
"Inconsistent encryption contexts: %lu/%lu",
377377
(unsigned long) d_inode(dir)->i_ino,
378378
(unsigned long) inode->i_ino);
379379
dput(dir);

fs/ext4/ialloc.c

Lines changed: 28 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,25 +1150,20 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
11501150
unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
11511151
ext4_group_t block_group;
11521152
int bit;
1153-
struct buffer_head *bitmap_bh;
1153+
struct buffer_head *bitmap_bh = NULL;
11541154
struct inode *inode = NULL;
1155-
long err = -EIO;
1155+
int err = -EFSCORRUPTED;
11561156

1157-
/* Error cases - e2fsck has already cleaned up for us */
1158-
if (ino > max_ino) {
1159-
ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino);
1160-
err = -EFSCORRUPTED;
1161-
goto error;
1162-
}
1157+
if (ino < EXT4_FIRST_INO(sb) || ino > max_ino)
1158+
goto bad_orphan;
11631159

11641160
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
11651161
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
11661162
bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
11671163
if (IS_ERR(bitmap_bh)) {
1168-
err = PTR_ERR(bitmap_bh);
1169-
ext4_warning(sb, "inode bitmap error %ld for orphan %lu",
1170-
ino, err);
1171-
goto error;
1164+
ext4_error(sb, "inode bitmap error %ld for orphan %lu",
1165+
ino, PTR_ERR(bitmap_bh));
1166+
return (struct inode *) bitmap_bh;
11721167
}
11731168

11741169
/* Having the inode bit set should be a 100% indicator that this
@@ -1179,45 +1174,47 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
11791174
goto bad_orphan;
11801175

11811176
inode = ext4_iget(sb, ino);
1182-
if (IS_ERR(inode))
1183-
goto iget_failed;
1177+
if (IS_ERR(inode)) {
1178+
err = PTR_ERR(inode);
1179+
ext4_error(sb, "couldn't read orphan inode %lu (err %d)",
1180+
ino, err);
1181+
return inode;
1182+
}
11841183

11851184
/*
1186-
* If the orphans has i_nlinks > 0 then it should be able to be
1187-
* truncated, otherwise it won't be removed from the orphan list
1188-
* during processing and an infinite loop will result.
1185+
* If the orphans has i_nlinks > 0 then it should be able to
1186+
* be truncated, otherwise it won't be removed from the orphan
1187+
* list during processing and an infinite loop will result.
1188+
* Similarly, it must not be a bad inode.
11891189
*/
1190-
if (inode->i_nlink && !ext4_can_truncate(inode))
1190+
if ((inode->i_nlink && !ext4_can_truncate(inode)) ||
1191+
is_bad_inode(inode))
11911192
goto bad_orphan;
11921193

11931194
if (NEXT_ORPHAN(inode) > max_ino)
11941195
goto bad_orphan;
11951196
brelse(bitmap_bh);
11961197
return inode;
11971198

1198-
iget_failed:
1199-
err = PTR_ERR(inode);
1200-
inode = NULL;
12011199
bad_orphan:
1202-
ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino);
1203-
printk(KERN_WARNING "ext4_test_bit(bit=%d, block=%llu) = %d\n",
1204-
bit, (unsigned long long)bitmap_bh->b_blocknr,
1205-
ext4_test_bit(bit, bitmap_bh->b_data));
1206-
printk(KERN_WARNING "inode=%p\n", inode);
1200+
ext4_error(sb, "bad orphan inode %lu", ino);
1201+
if (bitmap_bh)
1202+
printk(KERN_ERR "ext4_test_bit(bit=%d, block=%llu) = %d\n",
1203+
bit, (unsigned long long)bitmap_bh->b_blocknr,
1204+
ext4_test_bit(bit, bitmap_bh->b_data));
12071205
if (inode) {
1208-
printk(KERN_WARNING "is_bad_inode(inode)=%d\n",
1206+
printk(KERN_ERR "is_bad_inode(inode)=%d\n",
12091207
is_bad_inode(inode));
1210-
printk(KERN_WARNING "NEXT_ORPHAN(inode)=%u\n",
1208+
printk(KERN_ERR "NEXT_ORPHAN(inode)=%u\n",
12111209
NEXT_ORPHAN(inode));
1212-
printk(KERN_WARNING "max_ino=%lu\n", max_ino);
1213-
printk(KERN_WARNING "i_nlink=%u\n", inode->i_nlink);
1210+
printk(KERN_ERR "max_ino=%lu\n", max_ino);
1211+
printk(KERN_ERR "i_nlink=%u\n", inode->i_nlink);
12141212
/* Avoid freeing blocks if we got a bad deleted inode */
12151213
if (inode->i_nlink == 0)
12161214
inode->i_blocks = 0;
12171215
iput(inode);
12181216
}
12191217
brelse(bitmap_bh);
1220-
error:
12211218
return ERR_PTR(err);
12221219
}
12231220

0 commit comments

Comments
 (0)