Skip to content

Commit f0eb870

Browse files
committed
Merge tag 'xfs-5.14-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs fixes from Darrick Wong: "A few fixes for issues in the new online shrink code, additional corrections for my recent bug-hunt w.r.t. extent size hints on realtime, and improved input checking of the GROWFSRT ioctl. IOW, the usual 'I somehow got bored during the merge window and resumed auditing the farther reaches of xfs': - Fix shrink eligibility checking when sparse inode clusters enabled - Reset '..' directory entries when unlinking directories to prevent verifier errors if fs is shrinked later - Don't report unusable extent size hints to FSGETXATTR - Don't warn when extent size hints are unusable because the sysadmin configured them that way - Fix insufficient parameter validation in GROWFSRT ioctl - Fix integer overflow when adding rt volumes to filesystem" * tag 'xfs-5.14-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: xfs: detect misaligned rtinherit directory extent size hints xfs: fix an integer overflow error in xfs_growfs_rt xfs: improve FSGROWFSRT precondition checking xfs: don't expose misaligned extszinherit hints to userspace xfs: correct the narrative around misaligned rtinherit/extszinherit dirs xfs: reset child dir '..' entry when unlinking child xfs: check for sparse inode clusters that cross new EOAG when shrinking
2 parents fbf1bdd + b102a46 commit f0eb870

File tree

9 files changed

+174
-37
lines changed

9 files changed

+174
-37
lines changed

fs/xfs/libxfs/xfs_ag.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -803,6 +803,14 @@ xfs_ag_shrink_space(
803803

804804
args.fsbno = XFS_AGB_TO_FSB(mp, agno, aglen - delta);
805805

806+
/*
807+
* Make sure that the last inode cluster cannot overlap with the new
808+
* end of the AG, even if it's sparse.
809+
*/
810+
error = xfs_ialloc_check_shrink(*tpp, agno, agibp, aglen - delta);
811+
if (error)
812+
return error;
813+
806814
/*
807815
* Disable perag reservations so it doesn't cause the allocation request
808816
* to fail. We'll reestablish reservation before we return.

fs/xfs/libxfs/xfs_ialloc.c

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2928,3 +2928,58 @@ xfs_ialloc_calc_rootino(
29282928

29292929
return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno));
29302930
}
2931+
2932+
/*
2933+
* Ensure there are not sparse inode clusters that cross the new EOAG.
2934+
*
2935+
* This is a no-op for non-spinode filesystems since clusters are always fully
2936+
* allocated and checking the bnobt suffices. However, a spinode filesystem
2937+
* could have a record where the upper inodes are free blocks. If those blocks
2938+
* were removed from the filesystem, the inode record would extend beyond EOAG,
2939+
* which will be flagged as corruption.
2940+
*/
2941+
int
2942+
xfs_ialloc_check_shrink(
2943+
struct xfs_trans *tp,
2944+
xfs_agnumber_t agno,
2945+
struct xfs_buf *agibp,
2946+
xfs_agblock_t new_length)
2947+
{
2948+
struct xfs_inobt_rec_incore rec;
2949+
struct xfs_btree_cur *cur;
2950+
struct xfs_mount *mp = tp->t_mountp;
2951+
struct xfs_perag *pag;
2952+
xfs_agino_t agino = XFS_AGB_TO_AGINO(mp, new_length);
2953+
int has;
2954+
int error;
2955+
2956+
if (!xfs_sb_version_hassparseinodes(&mp->m_sb))
2957+
return 0;
2958+
2959+
pag = xfs_perag_get(mp, agno);
2960+
cur = xfs_inobt_init_cursor(mp, tp, agibp, pag, XFS_BTNUM_INO);
2961+
2962+
/* Look up the inobt record that would correspond to the new EOFS. */
2963+
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has);
2964+
if (error || !has)
2965+
goto out;
2966+
2967+
error = xfs_inobt_get_rec(cur, &rec, &has);
2968+
if (error)
2969+
goto out;
2970+
2971+
if (!has) {
2972+
error = -EFSCORRUPTED;
2973+
goto out;
2974+
}
2975+
2976+
/* If the record covers inodes that would be beyond EOFS, bail out. */
2977+
if (rec.ir_startino + XFS_INODES_PER_CHUNK > agino) {
2978+
error = -ENOSPC;
2979+
goto out;
2980+
}
2981+
out:
2982+
xfs_btree_del_cursor(cur, error);
2983+
xfs_perag_put(pag);
2984+
return error;
2985+
}

fs/xfs/libxfs/xfs_ialloc.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,4 +122,7 @@ int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
122122
void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
123123
xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit);
124124

125+
int xfs_ialloc_check_shrink(struct xfs_trans *tp, xfs_agnumber_t agno,
126+
struct xfs_buf *agibp, xfs_agblock_t new_length);
127+
125128
#endif /* __XFS_IALLOC_H__ */

fs/xfs/libxfs/xfs_inode_buf.c

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -592,23 +592,27 @@ xfs_inode_validate_extsize(
592592
/*
593593
* This comment describes a historic gap in this verifier function.
594594
*
595-
* On older kernels, the extent size hint verifier doesn't check that
596-
* the extent size hint is an integer multiple of the realtime extent
597-
* size on a directory with both RTINHERIT and EXTSZINHERIT flags set.
598-
* The verifier has always enforced the alignment rule for regular
599-
* files with the REALTIME flag set.
595+
* For a directory with both RTINHERIT and EXTSZINHERIT flags set, this
596+
* function has never checked that the extent size hint is an integer
597+
* multiple of the realtime extent size. Since we allow users to set
598+
* this combination on non-rt filesystems /and/ to change the rt
599+
* extent size when adding a rt device to a filesystem, the net effect
600+
* is that users can configure a filesystem anticipating one rt
601+
* geometry and change their minds later. Directories do not use the
602+
* extent size hint, so this is harmless for them.
600603
*
601604
* If a directory with a misaligned extent size hint is allowed to
602605
* propagate that hint into a new regular realtime file, the result
603606
* is that the inode cluster buffer verifier will trigger a corruption
604-
* shutdown the next time it is run.
607+
* shutdown the next time it is run, because the verifier has always
608+
* enforced the alignment rule for regular files.
605609
*
606-
* Unfortunately, there could be filesystems with these misconfigured
607-
* directories in the wild, so we cannot add a check to this verifier
608-
* at this time because that will result a new source of directory
609-
* corruption errors when reading an existing filesystem. Instead, we
610-
* permit the misconfiguration to pass through the verifiers so that
611-
* callers of this function can correct and mitigate externally.
610+
* Because we allow administrators to set a new rt extent size when
611+
* adding a rt section, we cannot add a check to this verifier because
612+
* that will result a new source of directory corruption errors when
613+
* reading an existing filesystem. Instead, we rely on callers to
614+
* decide when alignment checks are appropriate, and fix things up as
615+
* needed.
612616
*/
613617

614618
if (rt_flag)

fs/xfs/libxfs/xfs_trans_inode.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -143,16 +143,14 @@ xfs_trans_log_inode(
143143
}
144144

145145
/*
146-
* Inode verifiers on older kernels don't check that the extent size
147-
* hint is an integer multiple of the rt extent size on a directory
148-
* with both rtinherit and extszinherit flags set. If we're logging a
149-
* directory that is misconfigured in this way, clear the hint.
146+
* Inode verifiers do not check that the extent size hint is an integer
147+
* multiple of the rt extent size on a directory with both rtinherit
148+
* and extszinherit flags set. If we're logging a directory that is
149+
* misconfigured in this way, clear the hint.
150150
*/
151151
if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
152152
(ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
153153
(ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) {
154-
xfs_info_once(ip->i_mount,
155-
"Correcting misaligned extent size hint in inode 0x%llx.", ip->i_ino);
156154
ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
157155
XFS_DIFLAG_EXTSZINHERIT);
158156
ip->i_extsize = 0;

fs/xfs/scrub/inode.c

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,25 @@ xchk_inode_extsize(
7373
uint16_t flags)
7474
{
7575
xfs_failaddr_t fa;
76+
uint32_t value = be32_to_cpu(dip->di_extsize);
7677

77-
fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize),
78-
mode, flags);
78+
fa = xfs_inode_validate_extsize(sc->mp, value, mode, flags);
7979
if (fa)
8080
xchk_ino_set_corrupt(sc, ino);
81+
82+
/*
83+
* XFS allows a sysadmin to change the rt extent size when adding a rt
84+
* section to a filesystem after formatting. If there are any
85+
* directories with extszinherit and rtinherit set, the hint could
86+
* become misaligned with the new rextsize. The verifier doesn't check
87+
* this, because we allow rtinherit directories even without an rt
88+
* device. Flag this as an administrative warning since we will clean
89+
* this up eventually.
90+
*/
91+
if ((flags & XFS_DIFLAG_RTINHERIT) &&
92+
(flags & XFS_DIFLAG_EXTSZINHERIT) &&
93+
value % sc->mp->m_sb.sb_rextsize > 0)
94+
xchk_ino_set_warning(sc, ino);
8195
}
8296

8397
/*

fs/xfs/xfs_inode.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2763,6 +2763,19 @@ xfs_remove(
27632763
error = xfs_droplink(tp, ip);
27642764
if (error)
27652765
goto out_trans_cancel;
2766+
2767+
/*
2768+
* Point the unlinked child directory's ".." entry to the root
2769+
* directory to eliminate back-references to inodes that may
2770+
* get freed before the child directory is closed. If the fs
2771+
* gets shrunk, this can lead to dirent inode validation errors.
2772+
*/
2773+
if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) {
2774+
error = xfs_dir_replace(tp, ip, &xfs_name_dotdot,
2775+
tp->t_mountp->m_sb.sb_rootino, 0);
2776+
if (error)
2777+
return error;
2778+
}
27662779
} else {
27672780
/*
27682781
* When removing a non-directory we need to log the parent

fs/xfs/xfs_ioctl.c

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,7 +1065,24 @@ xfs_fill_fsxattr(
10651065

10661066
fileattr_fill_xflags(fa, xfs_ip2xflags(ip));
10671067

1068-
fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize);
1068+
if (ip->i_diflags & XFS_DIFLAG_EXTSIZE) {
1069+
fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize);
1070+
} else if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
1071+
/*
1072+
* Don't let a misaligned extent size hint on a directory
1073+
* escape to userspace if it won't pass the setattr checks
1074+
* later.
1075+
*/
1076+
if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
1077+
ip->i_extsize % mp->m_sb.sb_rextsize > 0) {
1078+
fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE |
1079+
FS_XFLAG_EXTSZINHERIT);
1080+
fa->fsx_extsize = 0;
1081+
} else {
1082+
fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize);
1083+
}
1084+
}
1085+
10691086
if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
10701087
fa->fsx_cowextsize = XFS_FSB_TO_B(mp, ip->i_cowextsize);
10711088
fa->fsx_projid = ip->i_projid;
@@ -1292,10 +1309,10 @@ xfs_ioctl_setattr_check_extsize(
12921309
new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags);
12931310

12941311
/*
1295-
* Inode verifiers on older kernels don't check that the extent size
1296-
* hint is an integer multiple of the rt extent size on a directory
1297-
* with both rtinherit and extszinherit flags set. Don't let sysadmins
1298-
* misconfigure directories.
1312+
* Inode verifiers do not check that the extent size hint is an integer
1313+
* multiple of the rt extent size on a directory with both rtinherit
1314+
* and extszinherit flags set. Don't let sysadmins misconfigure
1315+
* directories.
12991316
*/
13001317
if ((new_diflags & XFS_DIFLAG_RTINHERIT) &&
13011318
(new_diflags & XFS_DIFLAG_EXTSZINHERIT)) {

fs/xfs/xfs_rtalloc.c

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -923,16 +923,41 @@ xfs_growfs_rt(
923923
uint8_t *rsum_cache; /* old summary cache */
924924

925925
sbp = &mp->m_sb;
926-
/*
927-
* Initial error checking.
928-
*/
926+
929927
if (!capable(CAP_SYS_ADMIN))
930928
return -EPERM;
931-
if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL ||
932-
(nrblocks = in->newblocks) <= sbp->sb_rblocks ||
933-
(sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize)))
929+
930+
/* Needs to have been mounted with an rt device. */
931+
if (!XFS_IS_REALTIME_MOUNT(mp))
932+
return -EINVAL;
933+
/*
934+
* Mount should fail if the rt bitmap/summary files don't load, but
935+
* we'll check anyway.
936+
*/
937+
if (!mp->m_rbmip || !mp->m_rsumip)
938+
return -EINVAL;
939+
940+
/* Shrink not supported. */
941+
if (in->newblocks <= sbp->sb_rblocks)
942+
return -EINVAL;
943+
944+
/* Can only change rt extent size when adding rt volume. */
945+
if (sbp->sb_rblocks > 0 && in->extsize != sbp->sb_rextsize)
946+
return -EINVAL;
947+
948+
/* Range check the extent size. */
949+
if (XFS_FSB_TO_B(mp, in->extsize) > XFS_MAX_RTEXTSIZE ||
950+
XFS_FSB_TO_B(mp, in->extsize) < XFS_MIN_RTEXTSIZE)
934951
return -EINVAL;
935-
if ((error = xfs_sb_validate_fsb_count(sbp, nrblocks)))
952+
953+
/* Unsupported realtime features. */
954+
if (xfs_sb_version_hasrmapbt(&mp->m_sb) ||
955+
xfs_sb_version_hasreflink(&mp->m_sb))
956+
return -EOPNOTSUPP;
957+
958+
nrblocks = in->newblocks;
959+
error = xfs_sb_validate_fsb_count(sbp, nrblocks);
960+
if (error)
936961
return error;
937962
/*
938963
* Read in the last block of the device, make sure it exists.
@@ -996,7 +1021,8 @@ xfs_growfs_rt(
9961021
((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0);
9971022
bmbno < nrbmblocks;
9981023
bmbno++) {
999-
xfs_trans_t *tp;
1024+
struct xfs_trans *tp;
1025+
xfs_rfsblock_t nrblocks_step;
10001026

10011027
*nmp = *mp;
10021028
nsbp = &nmp->m_sb;
@@ -1005,10 +1031,9 @@ xfs_growfs_rt(
10051031
*/
10061032
nsbp->sb_rextsize = in->extsize;
10071033
nsbp->sb_rbmblocks = bmbno + 1;
1008-
nsbp->sb_rblocks =
1009-
XFS_RTMIN(nrblocks,
1010-
nsbp->sb_rbmblocks * NBBY *
1011-
nsbp->sb_blocksize * nsbp->sb_rextsize);
1034+
nrblocks_step = (bmbno + 1) * NBBY * nsbp->sb_blocksize *
1035+
nsbp->sb_rextsize;
1036+
nsbp->sb_rblocks = min(nrblocks, nrblocks_step);
10121037
nsbp->sb_rextents = nsbp->sb_rblocks;
10131038
do_div(nsbp->sb_rextents, nsbp->sb_rextsize);
10141039
ASSERT(nsbp->sb_rextents != 0);

0 commit comments

Comments
 (0)