Skip to content

Commit be6297e

Browse files
committed
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "Scalability improvements when allocating inodes, and some miscellaneous bug fixes and cleanups" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: avoid Y2038 overflow in recently_deleted() ext4: fix fault handling when mounted with -o dax,ro ext4: fix quota inconsistency during orphan cleanup for read-only mounts ext4: fix incorrect quotaoff if the quota feature is enabled ext4: remove useless test and assignment in strtohash functions ext4: backward compatibility support for Lustre ea_inode implementation ext4: remove timebomb in ext4_decode_extra_time() ext4: use sizeof(*ptr) ext4: in ext4_seek_{hole,data}, return -ENXIO for negative offsets ext4: reduce lock contention in __ext4_new_inode ext4: cleanup goto next group ext4: do not unnecessarily allocate buffer in recently_deleted()
2 parents 5791577 + b5f5157 commit be6297e

File tree

10 files changed

+222
-111
lines changed

10 files changed

+222
-111
lines changed

fs/ext4/dir.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp,
411411
{
412412
struct dir_private_info *p;
413413

414-
p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
414+
p = kzalloc(sizeof(*p), GFP_KERNEL);
415415
if (!p)
416416
return NULL;
417417
p->curr_hash = pos2maj_hash(filp, pos);

fs/ext4/ext4.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -838,13 +838,11 @@ static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
838838
{
839839
if (unlikely(sizeof(time->tv_sec) > 4 &&
840840
(extra & cpu_to_le32(EXT4_EPOCH_MASK)))) {
841-
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,20,0)
841+
842+
#if 1
842843
/* Handle legacy encoding of pre-1970 dates with epoch
843-
* bits 1,1. We assume that by kernel version 4.20,
844-
* everyone will have run fsck over the affected
845-
* filesystems to correct the problem. (This
846-
* backwards compatibility may be removed before this
847-
* time, at the discretion of the ext4 developers.)
844+
* bits 1,1. (This backwards compatibility may be removed
845+
* at the discretion of the ext4 developers.)
848846
*/
849847
u64 extra_bits = le32_to_cpu(extra) & EXT4_EPOCH_MASK;
850848
if (extra_bits == 3 && ((time->tv_sec) & 0x80000000) != 0)
@@ -1567,6 +1565,7 @@ enum {
15671565
nolocking */
15681566
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
15691567
EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
1568+
EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */
15701569
};
15711570

15721571
#define EXT4_INODE_BIT_FNS(name, field, offset) \

fs/ext4/file.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,20 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
279279
handle_t *handle = NULL;
280280
struct inode *inode = file_inode(vmf->vma->vm_file);
281281
struct super_block *sb = inode->i_sb;
282-
bool write = vmf->flags & FAULT_FLAG_WRITE;
282+
283+
/*
284+
* We have to distinguish real writes from writes which will result in a
285+
* COW page; COW writes should *not* poke the journal (the file will not
286+
* be changed). Doing so would cause unintended failures when mounted
287+
* read-only.
288+
*
289+
* We check for VM_SHARED rather than vmf->cow_page since the latter is
290+
* unset for pe_size != PE_SIZE_PTE (i.e. only in do_cow_fault); for
291+
* other sizes, dax_iomap_fault will handle splitting / fallback so that
292+
* we eventually come back with a COW page.
293+
*/
294+
bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
295+
(vmf->vma->vm_flags & VM_SHARED);
283296

284297
if (write) {
285298
sb_start_pagefault(sb);
@@ -595,7 +608,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
595608
inode_lock(inode);
596609

597610
isize = i_size_read(inode);
598-
if (offset >= isize) {
611+
if (offset < 0 || offset >= isize) {
599612
inode_unlock(inode);
600613
return -ENXIO;
601614
}
@@ -658,7 +671,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
658671
inode_lock(inode);
659672

660673
isize = i_size_read(inode);
661-
if (offset >= isize) {
674+
if (offset < 0 || offset >= isize) {
662675
inode_unlock(inode);
663676
return -ENXIO;
664677
}

fs/ext4/hash.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,6 @@ static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num)
148148
if (len > num*4)
149149
len = num * 4;
150150
for (i = 0; i < len; i++) {
151-
if ((i % 4) == 0)
152-
val = pad;
153151
val = ((int) scp[i]) + (val << 8);
154152
if ((i % 4) == 3) {
155153
*buf++ = val;
@@ -176,8 +174,6 @@ static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
176174
if (len > num*4)
177175
len = num * 4;
178176
for (i = 0; i < len; i++) {
179-
if ((i % 4) == 0)
180-
val = pad;
181177
val = ((int) ucp[i]) + (val << 8);
182178
if ((i % 4) == 3) {
183179
*buf++ = val;

fs/ext4/ialloc.c

Lines changed: 60 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -692,24 +692,25 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
692692
* somewhat arbitrary...)
693693
*/
694694
#define RECENTCY_MIN 5
695-
#define RECENTCY_DIRTY 30
695+
#define RECENTCY_DIRTY 300
696696

697697
static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
698698
{
699699
struct ext4_group_desc *gdp;
700700
struct ext4_inode *raw_inode;
701701
struct buffer_head *bh;
702-
unsigned long dtime, now;
703-
int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
704-
int offset, ret = 0, recentcy = RECENTCY_MIN;
702+
int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
703+
int offset, ret = 0;
704+
int recentcy = RECENTCY_MIN;
705+
u32 dtime, now;
705706

706707
gdp = ext4_get_group_desc(sb, group, NULL);
707708
if (unlikely(!gdp))
708709
return 0;
709710

710-
bh = sb_getblk(sb, ext4_inode_table(sb, gdp) +
711+
bh = sb_find_get_block(sb, ext4_inode_table(sb, gdp) +
711712
(ino / inodes_per_block));
712-
if (unlikely(!bh) || !buffer_uptodate(bh))
713+
if (!bh || !buffer_uptodate(bh))
713714
/*
714715
* If the block is not in the buffer cache, then it
715716
* must have been written out.
@@ -718,18 +719,45 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
718719

719720
offset = (ino % inodes_per_block) * EXT4_INODE_SIZE(sb);
720721
raw_inode = (struct ext4_inode *) (bh->b_data + offset);
722+
723+
/* i_dtime is only 32 bits on disk, but we only care about relative
724+
* times in the range of a few minutes (i.e. long enough to sync a
725+
* recently-deleted inode to disk), so using the low 32 bits of the
726+
* clock (a 68 year range) is enough, see time_before32() */
721727
dtime = le32_to_cpu(raw_inode->i_dtime);
722-
now = get_seconds();
728+
now = ktime_get_real_seconds();
723729
if (buffer_dirty(bh))
724730
recentcy += RECENTCY_DIRTY;
725731

726-
if (dtime && (dtime < now) && (now < dtime + recentcy))
732+
if (dtime && time_before32(dtime, now) &&
733+
time_before32(now, dtime + recentcy))
727734
ret = 1;
728735
out:
729736
brelse(bh);
730737
return ret;
731738
}
732739

740+
static int find_inode_bit(struct super_block *sb, ext4_group_t group,
741+
struct buffer_head *bitmap, unsigned long *ino)
742+
{
743+
next:
744+
*ino = ext4_find_next_zero_bit((unsigned long *)
745+
bitmap->b_data,
746+
EXT4_INODES_PER_GROUP(sb), *ino);
747+
if (*ino >= EXT4_INODES_PER_GROUP(sb))
748+
return 0;
749+
750+
if ((EXT4_SB(sb)->s_journal == NULL) &&
751+
recently_deleted(sb, group, *ino)) {
752+
*ino = *ino + 1;
753+
if (*ino < EXT4_INODES_PER_GROUP(sb))
754+
goto next;
755+
return 0;
756+
}
757+
758+
return 1;
759+
}
760+
733761
/*
734762
* There are two policies for allocating an inode. If the new inode is
735763
* a directory, then a forward search is made for a block group with both
@@ -892,47 +920,34 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
892920
/*
893921
* Check free inodes count before loading bitmap.
894922
*/
895-
if (ext4_free_inodes_count(sb, gdp) == 0) {
896-
if (++group == ngroups)
897-
group = 0;
898-
continue;
899-
}
923+
if (ext4_free_inodes_count(sb, gdp) == 0)
924+
goto next_group;
900925

901926
grp = ext4_get_group_info(sb, group);
902927
/* Skip groups with already-known suspicious inode tables */
903-
if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
904-
if (++group == ngroups)
905-
group = 0;
906-
continue;
907-
}
928+
if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
929+
goto next_group;
908930

909931
brelse(inode_bitmap_bh);
910932
inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
911933
/* Skip groups with suspicious inode tables */
912934
if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) ||
913935
IS_ERR(inode_bitmap_bh)) {
914936
inode_bitmap_bh = NULL;
915-
if (++group == ngroups)
916-
group = 0;
917-
continue;
937+
goto next_group;
918938
}
919939

920940
repeat_in_this_group:
921-
ino = ext4_find_next_zero_bit((unsigned long *)
922-
inode_bitmap_bh->b_data,
923-
EXT4_INODES_PER_GROUP(sb), ino);
924-
if (ino >= EXT4_INODES_PER_GROUP(sb))
941+
ret2 = find_inode_bit(sb, group, inode_bitmap_bh, &ino);
942+
if (!ret2)
925943
goto next_group;
926-
if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
944+
945+
if (group == 0 && (ino + 1) < EXT4_FIRST_INO(sb)) {
927946
ext4_error(sb, "reserved inode found cleared - "
928947
"inode=%lu", ino + 1);
929-
continue;
930-
}
931-
if ((EXT4_SB(sb)->s_journal == NULL) &&
932-
recently_deleted(sb, group, ino)) {
933-
ino++;
934-
goto next_inode;
948+
goto next_group;
935949
}
950+
936951
if (!handle) {
937952
BUG_ON(nblocks <= 0);
938953
handle = __ext4_journal_start_sb(dir->i_sb, line_no,
@@ -952,11 +967,23 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
952967
}
953968
ext4_lock_group(sb, group);
954969
ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
970+
if (ret2) {
971+
/* Someone already took the bit. Repeat the search
972+
* with lock held.
973+
*/
974+
ret2 = find_inode_bit(sb, group, inode_bitmap_bh, &ino);
975+
if (ret2) {
976+
ext4_set_bit(ino, inode_bitmap_bh->b_data);
977+
ret2 = 0;
978+
} else {
979+
ret2 = 1; /* we didn't grab the inode */
980+
}
981+
}
955982
ext4_unlock_group(sb, group);
956983
ino++; /* the inode bitmap is zero-based */
957984
if (!ret2)
958985
goto got; /* we grabbed the inode! */
959-
next_inode:
986+
960987
if (ino < EXT4_INODES_PER_GROUP(sb))
961988
goto repeat_in_this_group;
962989
next_group:

fs/ext4/inode.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4897,14 +4897,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
48974897
brelse(iloc.bh);
48984898
ext4_set_inode_flags(inode);
48994899

4900-
if (ei->i_flags & EXT4_EA_INODE_FL) {
4901-
ext4_xattr_inode_set_class(inode);
4902-
4903-
inode_lock(inode);
4904-
inode->i_flags |= S_NOQUOTA;
4905-
inode_unlock(inode);
4906-
}
4907-
49084900
unlock_new_inode(inode);
49094901
return inode;
49104902

fs/ext4/mmp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ int ext4_multi_mount_protect(struct super_block *sb,
367367
goto failed;
368368
}
369369

370-
mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
370+
mmpd_data = kmalloc(sizeof(*mmpd_data), GFP_KERNEL);
371371
if (!mmpd_data) {
372372
ext4_warning(sb, "not enough memory for mmpd_data");
373373
goto failed;

fs/ext4/super.c

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2404,6 +2404,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
24042404
unsigned int s_flags = sb->s_flags;
24052405
int ret, nr_orphans = 0, nr_truncates = 0;
24062406
#ifdef CONFIG_QUOTA
2407+
int quota_update = 0;
24072408
int i;
24082409
#endif
24092410
if (!es->s_last_orphan) {
@@ -2442,14 +2443,32 @@ static void ext4_orphan_cleanup(struct super_block *sb,
24422443
#ifdef CONFIG_QUOTA
24432444
/* Needed for iput() to work correctly and not trash data */
24442445
sb->s_flags |= MS_ACTIVE;
2445-
/* Turn on quotas so that they are updated correctly */
2446+
2447+
/*
2448+
* Turn on quotas which were not enabled for read-only mounts if
2449+
* filesystem has quota feature, so that they are updated correctly.
2450+
*/
2451+
if (ext4_has_feature_quota(sb) && (s_flags & MS_RDONLY)) {
2452+
int ret = ext4_enable_quotas(sb);
2453+
2454+
if (!ret)
2455+
quota_update = 1;
2456+
else
2457+
ext4_msg(sb, KERN_ERR,
2458+
"Cannot turn on quotas: error %d", ret);
2459+
}
2460+
2461+
/* Turn on journaled quotas used for old sytle */
24462462
for (i = 0; i < EXT4_MAXQUOTAS; i++) {
24472463
if (EXT4_SB(sb)->s_qf_names[i]) {
24482464
int ret = ext4_quota_on_mount(sb, i);
2449-
if (ret < 0)
2465+
2466+
if (!ret)
2467+
quota_update = 1;
2468+
else
24502469
ext4_msg(sb, KERN_ERR,
24512470
"Cannot turn on journaled "
2452-
"quota: error %d", ret);
2471+
"quota: type %d: error %d", i, ret);
24532472
}
24542473
}
24552474
#endif
@@ -2510,10 +2529,12 @@ static void ext4_orphan_cleanup(struct super_block *sb,
25102529
ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
25112530
PLURAL(nr_truncates));
25122531
#ifdef CONFIG_QUOTA
2513-
/* Turn quotas off */
2514-
for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2515-
if (sb_dqopt(sb)->files[i])
2516-
dquot_quota_off(sb, i);
2532+
/* Turn off quotas if they were enabled for orphan cleanup */
2533+
if (quota_update) {
2534+
for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2535+
if (sb_dqopt(sb)->files[i])
2536+
dquot_quota_off(sb, i);
2537+
}
25172538
}
25182539
#endif
25192540
sb->s_flags = s_flags; /* Restore MS_RDONLY status */
@@ -5512,6 +5533,9 @@ static int ext4_enable_quotas(struct super_block *sb)
55125533
DQUOT_USAGE_ENABLED |
55135534
(quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
55145535
if (err) {
5536+
for (type--; type >= 0; type--)
5537+
dquot_quota_off(sb, type);
5538+
55155539
ext4_warning(sb,
55165540
"Failed to enable quota tracking "
55175541
"(type=%d, err=%d). Please run "

0 commit comments

Comments
 (0)