Skip to content

Commit de27f2b

Browse files
biger410Brian Maly
authored andcommitted
md: fix deadlock causing by sysfs_notify
The following deadlock was captured. The first process is holding 'kernfs_mutex' and hung by io. The io was staging in 'r1conf.pending_bio_list' of raid1 device, this pending bio list would be flushed by second process 'md127_raid1', but it was hung by 'kernfs_mutex'. Using sysfs_notify_dirent_safe() to replace sysfs_notify() can fix it. There were other sysfs_notify() invoked from io path, removed all of them. PID: 40430 TASK: ffff8ee9c8c65c40 CPU: 29 COMMAND: "probe_file" #0 [ffffb87c4df37260] __schedule at ffffffff9a8678ec #1 [ffffb87c4df372f8] schedule at ffffffff9a867f06 #2 [ffffb87c4df37310] io_schedule at ffffffff9a0c73e6 #3 [ffffb87c4df37328] __dta___xfs_iunpin_wait_3443 at ffffffffc03a4057 [xfs] #4 [ffffb87c4df373a0] xfs_iunpin_wait at ffffffffc03a6c79 [xfs] #5 [ffffb87c4df373b0] __dta_xfs_reclaim_inode_3357 at ffffffffc039a46c [xfs] #6 [ffffb87c4df37400] xfs_reclaim_inodes_ag at ffffffffc039a8b6 [xfs] #7 [ffffb87c4df37590] xfs_reclaim_inodes_nr at ffffffffc039bb33 [xfs] #8 [ffffb87c4df375b0] xfs_fs_free_cached_objects at ffffffffc03af0e9 [xfs] #9 [ffffb87c4df375c0] super_cache_scan at ffffffff9a287ec7 #10 [ffffb87c4df37618] shrink_slab at ffffffff9a1efd93 #11 [ffffb87c4df37700] shrink_node at ffffffff9a1f5968 #12 [ffffb87c4df37788] do_try_to_free_pages at ffffffff9a1f5ea2 #13 [ffffb87c4df377f0] try_to_free_mem_cgroup_pages at ffffffff9a1f6445 #14 [ffffb87c4df37880] try_charge at ffffffff9a26cc5f #15 [ffffb87c4df37920] memcg_kmem_charge_memcg at ffffffff9a270f6a #16 [ffffb87c4df37958] new_slab at ffffffff9a251430 #17 [ffffb87c4df379c0] ___slab_alloc at ffffffff9a251c85 #18 [ffffb87c4df37a80] __slab_alloc at ffffffff9a25635d #19 [ffffb87c4df37ac0] kmem_cache_alloc at ffffffff9a251f89 #20 [ffffb87c4df37b00] alloc_inode at ffffffff9a2a2b10 #21 [ffffb87c4df37b20] iget_locked at ffffffff9a2a4854 #22 [ffffb87c4df37b60] kernfs_get_inode at ffffffff9a311377 #23 [ffffb87c4df37b80] kernfs_iop_lookup at ffffffff9a311e2b #24 [ffffb87c4df37ba8] lookup_slow at ffffffff9a290118 #25 [ffffb87c4df37c10] walk_component at ffffffff9a291e83 #26 [ffffb87c4df37c78] path_lookupat at ffffffff9a293619 #27 [ffffb87c4df37cd8] filename_lookup at ffffffff9a2953af #28 [ffffb87c4df37de8] user_path_at_empty at ffffffff9a295566 #29 [ffffb87c4df37e10] vfs_statx at ffffffff9a289787 #30 [ffffb87c4df37e70] SYSC_newlstat at ffffffff9a289d5d #31 [ffffb87c4df37f18] sys_newlstat at ffffffff9a28a60e #32 [ffffb87c4df37f28] do_syscall_64 at ffffffff9a003949 #33 [ffffb87c4df37f50] entry_SYSCALL_64_after_hwframe at ffffffff9aa001ad RIP: 00007f617a5f2905 RSP: 00007f607334f838 RFLAGS: 00000246 RAX: ffffffffffffffda RBX: 00007f6064044b20 RCX: 00007f617a5f2905 RDX: 00007f6064044b20 RSI: 00007f6064044b20 RDI: 00007f6064005890 RBP: 00007f6064044aa0 R8: 0000000000000030 R9: 000000000000011c R10: 0000000000000013 R11: 0000000000000246 R12: 00007f606417e6d0 R13: 00007f6064044aa0 R14: 00007f6064044b10 R15: 00000000ffffffff ORIG_RAX: 0000000000000006 CS: 0033 SS: 002b PID: 927 TASK: ffff8f15ac5dbd80 CPU: 42 COMMAND: "md127_raid1" #0 [ffffb87c4df07b28] __schedule at ffffffff9a8678ec #1 [ffffb87c4df07bc0] schedule at ffffffff9a867f06 #2 [ffffb87c4df07bd8] schedule_preempt_disabled at ffffffff9a86825e #3 [ffffb87c4df07be8] __mutex_lock at ffffffff9a869bcc #4 [ffffb87c4df07ca0] __mutex_lock_slowpath at ffffffff9a86a013 #5 [ffffb87c4df07cb0] mutex_lock at ffffffff9a86a04f #6 [ffffb87c4df07cc8] kernfs_find_and_get_ns at ffffffff9a311d83 #7 [ffffb87c4df07cf0] sysfs_notify at ffffffff9a314b3a #8 [ffffb87c4df07d18] md_update_sb at ffffffff9a688696 #9 [ffffb87c4df07d98] md_update_sb at ffffffff9a6886d5 #10 [ffffb87c4df07da8] md_check_recovery at ffffffff9a68ad9c #11 [ffffb87c4df07dd0] raid1d at ffffffffc01f0375 [raid1] #12 [ffffb87c4df07ea0] md_thread at ffffffff9a680348 #13 [ffffb87c4df07f08] kthread at ffffffff9a0b8005 #14 [ffffb87c4df07f50] ret_from_fork at ffffffff9aa00344 Signed-off-by: Junxiao Bi <[email protected]> Signed-off-by: Song Liu <[email protected]> (cherry picked from commit e1a86db) Orabug: 31683116 Signed-off-by: Junxiao Bi <[email protected]> Reviewed-by: Joe Jin <[email protected]> Conflicts: drivers/md/md-bitmap.c Signed-off-by: Brian Maly <[email protected]>
1 parent 8600d42 commit de27f2b

File tree

5 files changed

+25
-15
lines changed

5 files changed

+25
-15
lines changed

drivers/md/bitmap.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1597,7 +1597,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
15971597
s += blocks;
15981598
}
15991599
bitmap->last_end_sync = jiffies;
1600-
sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed");
1600+
sysfs_notify_dirent_safe(bitmap->mddev->sysfs_completed);
16011601
}
16021602
EXPORT_SYMBOL(bitmap_cond_end_sync);
16031603

drivers/md/md.c

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2381,7 +2381,7 @@ void md_update_sb(struct mddev *mddev, int force_change)
23812381
spin_unlock(&mddev->lock);
23822382
wake_up(&mddev->sb_wait);
23832383
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2384-
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2384+
sysfs_notify_dirent_safe(mddev->sysfs_completed);
23852385

23862386
rdev_for_each(rdev, mddev) {
23872387
if (test_and_clear_bit(FaultRecorded, &rdev->flags))
@@ -3534,7 +3534,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
35343534
mddev_resume(mddev);
35353535
if (!mddev->thread)
35363536
md_update_sb(mddev, 1);
3537-
sysfs_notify(&mddev->kobj, NULL, "level");
3537+
sysfs_notify_dirent_safe(mddev->sysfs_level);
35383538
md_new_event(mddev);
35393539
rv = len;
35403540
out_unlock:
@@ -4263,7 +4263,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
42634263
}
42644264
if (err)
42654265
return err;
4266-
sysfs_notify(&mddev->kobj, NULL, "degraded");
4266+
sysfs_notify_dirent_safe(mddev->sysfs_degraded);
42674267
} else {
42684268
if (cmd_match(page, "check"))
42694269
set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
@@ -4837,6 +4837,12 @@ static void md_free(struct kobject *ko)
48374837

48384838
if (mddev->sysfs_state)
48394839
sysfs_put(mddev->sysfs_state);
4840+
if (mddev->sysfs_completed)
4841+
sysfs_put(mddev->sysfs_completed);
4842+
if (mddev->sysfs_degraded)
4843+
sysfs_put(mddev->sysfs_degraded);
4844+
if (mddev->sysfs_level)
4845+
sysfs_put(mddev->sysfs_level);
48404846

48414847
if (mddev->queue)
48424848
blk_cleanup_queue(mddev->queue);
@@ -4969,6 +4975,9 @@ static int md_alloc(dev_t dev, char *name)
49694975
if (!error && mddev->kobj.sd) {
49704976
kobject_uevent(&mddev->kobj, KOBJ_ADD);
49714977
mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
4978+
mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
4979+
mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
4980+
mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
49724981
}
49734982
mddev_put(mddev);
49744983
return error;
@@ -5222,7 +5231,7 @@ int md_run(struct mddev *mddev)
52225231
md_new_event(mddev);
52235232
sysfs_notify_dirent_safe(mddev->sysfs_state);
52245233
sysfs_notify_dirent_safe(mddev->sysfs_action);
5225-
sysfs_notify(&mddev->kobj, NULL, "degraded");
5234+
sysfs_notify_dirent_safe(mddev->sysfs_degraded);
52265235
return 0;
52275236
}
52285237
EXPORT_SYMBOL_GPL(md_run);
@@ -7796,7 +7805,7 @@ void md_do_sync(struct md_thread *thread)
77967805
} else
77977806
mddev->curr_resync = 3; /* no longer delayed */
77987807
mddev->curr_resync_completed = j;
7799-
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7808+
sysfs_notify_dirent_safe(mddev->sysfs_completed);
78007809
md_new_event(mddev);
78017810
update_time = jiffies;
78027811

@@ -7826,7 +7835,7 @@ void md_do_sync(struct md_thread *thread)
78267835
mddev->recovery_cp = j;
78277836
update_time = jiffies;
78287837
set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7829-
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7838+
sysfs_notify_dirent_safe(mddev->sysfs_completed);
78307839
}
78317840

78327841
while (j >= mddev->resync_max &&
@@ -8009,7 +8018,7 @@ static int remove_and_add_spares(struct mddev *mddev,
80098018
}
80108019
}
80118020
if (removed && mddev->kobj.sd)
8012-
sysfs_notify(&mddev->kobj, NULL, "degraded");
8021+
sysfs_notify_dirent_safe(mddev->sysfs_degraded);
80138022

80148023
if (this)
80158024
goto no_add;
@@ -8261,8 +8270,7 @@ void md_reap_sync_thread(struct mddev *mddev)
82618270
/* success...*/
82628271
/* activate any spares */
82638272
if (mddev->pers->spare_active(mddev)) {
8264-
sysfs_notify(&mddev->kobj, NULL,
8265-
"degraded");
8273+
sysfs_notify_dirent_safe(mddev->sysfs_degraded);
82668274
set_bit(MD_CHANGE_DEVS, &mddev->flags);
82678275
}
82688276
}

drivers/md/md.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,6 @@ struct md_rdev {
104104

105105
struct kernfs_node *sysfs_state; /* handle for 'state'
106106
* sysfs entry */
107-
108107
struct badblocks badblocks;
109108
};
110109
enum flag_bits {
@@ -357,6 +356,9 @@ struct mddev {
357356
* file in sysfs.
358357
*/
359358
struct kernfs_node *sysfs_action; /* handle for 'sync_action' */
359+
struct kernfs_node *sysfs_completed; /*handle for 'sync_completed' */
360+
struct kernfs_node *sysfs_degraded; /*handle for 'degraded' */
361+
struct kernfs_node *sysfs_level; /*handle for 'level' */
360362

361363
struct work_struct del_work; /* used for delayed sysfs removal */
362364

drivers/md/raid10.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4281,7 +4281,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
42814281
sector_nr = conf->reshape_progress;
42824282
if (sector_nr) {
42834283
mddev->curr_resync_completed = sector_nr;
4284-
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
4284+
sysfs_notify_dirent_safe(mddev->sysfs_completed);
42854285
*skipped = 1;
42864286
return sector_nr;
42874287
}

drivers/md/raid5.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5367,7 +5367,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
53675367
sector_div(sector_nr, new_data_disks);
53685368
if (sector_nr) {
53695369
mddev->curr_resync_completed = sector_nr;
5370-
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
5370+
sysfs_notify_dirent_safe(mddev->sysfs_completed);
53715371
*skipped = 1;
53725372
return sector_nr;
53735373
}
@@ -5468,7 +5468,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
54685468
conf->reshape_safe = mddev->reshape_position;
54695469
spin_unlock_irq(&conf->device_lock);
54705470
wake_up(&conf->wait_for_overlap);
5471-
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
5471+
sysfs_notify_dirent_safe(mddev->sysfs_completed);
54725472
}
54735473

54745474
INIT_LIST_HEAD(&stripes);
@@ -5564,7 +5564,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
55645564
conf->reshape_safe = mddev->reshape_position;
55655565
spin_unlock_irq(&conf->device_lock);
55665566
wake_up(&conf->wait_for_overlap);
5567-
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
5567+
sysfs_notify_dirent_safe(mddev->sysfs_completed);
55685568
}
55695569
ret:
55705570
return reshape_sectors;

0 commit comments

Comments
 (0)