Skip to content

Commit 6229b41

Browse files
nagabiJames Bottomley
authored andcommitted
mpt2sas: setpci reset kernel oops fix
mpt2sas: setpci reset on nytro warpdrive card along with sysfs access and cli ioctl access resulted in kernel oops 1. pci_access_mutex lock added to provide synchronization between IOCTL, sysfs, PCI resource handling path 2. gioc_lock spinlock to protect list operations over multiple controllers >From c53a1cff4c07528b8b9ec7f6716e94950283e8f9 Mon Sep 17 00:00:00 2001 From: Nagarajkumar Narayanan <[email protected]> Date: Tue, 18 Aug 2015 11:58:13 +0530 Subject: [PATCH] mpt2sas setpci reset oops fix In mpt2sas driver due to lack of synchronization between ioctl, BRM status access through sysfs, pci resource removal kernel oops happen as ioctl path and BRM status sysfs access path still tries to access the removed resources Two locks added to provide syncrhonization 1. pci_access_mutex: Mutex to synchronize ioctl,sysfs show path and pci resource handling. PCI resource freeing will lead to free vital hardware/memory resource, which might be in use by cli/sysfs path functions resulting in Null pointer reference followed by kernel crash. To avoid the above race condition we use mutex syncrhonization which ensures the syncrhonization between cli/sysfs_show path Note: pci_access_mutex is used only if nytro warpdrive cards (ioc->is_warpdrive based on device id) are used as we could not test this case with other SAS2 HBA cards We can remove this check if this behaviour confirmed from other cards. 2. spinlock on list operations over IOCs Case: when multiple warpdrive cards(IOCs) are in use Each IOC will added to the ioc list stucture on initialization. Watchdog threads run at regular intervals to check IOC for any fault conditions which will trigger the dead_ioc thread to deallocate pci resource, resulting deleting the IOC netry from list, this deletion need to protected by spinlock to enusre that ioc removal is syncrhonized, if not synchronized it might lead to list_del corruption as the ioc list is traversed in cli path Signed-off-by: Nagarajkumar Narayanan <[email protected]> Reviewed-by: Johannes Thumshirn <[email protected]> Acked-by: Sreekanth Reddy <[email protected]> Signed-off-by: James Bottomley <[email protected]>
1 parent 50acde8 commit 6229b41

File tree

4 files changed

+67
-9
lines changed

4 files changed

+67
-9
lines changed

drivers/scsi/mpt2sas/mpt2sas_base.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,12 @@ _scsih_set_fwfault_debug(const char *val, struct kernel_param *kp)
112112
if (ret)
113113
return ret;
114114

115+
/* global ioc spinlock to protect controller list on list operations */
115116
printk(KERN_INFO "setting fwfault_debug(%d)\n", mpt2sas_fwfault_debug);
117+
spin_lock(&gioc_lock);
116118
list_for_each_entry(ioc, &mpt2sas_ioc_list, list)
117119
ioc->fwfault_debug = mpt2sas_fwfault_debug;
120+
spin_unlock(&gioc_lock);
118121
return 0;
119122
}
120123

@@ -4437,6 +4440,8 @@ mpt2sas_base_free_resources(struct MPT2SAS_ADAPTER *ioc)
44374440
dexitprintk(ioc, printk(MPT2SAS_INFO_FMT "%s\n", ioc->name,
44384441
__func__));
44394442

4443+
/* synchronizing freeing resource with pci_access_mutex lock */
4444+
mutex_lock(&ioc->pci_access_mutex);
44404445
if (ioc->chip_phys && ioc->chip) {
44414446
_base_mask_interrupts(ioc);
44424447
ioc->shost_recovery = 1;
@@ -4456,6 +4461,7 @@ mpt2sas_base_free_resources(struct MPT2SAS_ADAPTER *ioc)
44564461
pci_disable_pcie_error_reporting(pdev);
44574462
pci_disable_device(pdev);
44584463
}
4464+
mutex_unlock(&ioc->pci_access_mutex);
44594465
return;
44604466
}
44614467

drivers/scsi/mpt2sas/mpt2sas_base.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -817,6 +817,12 @@ typedef void (*MPT2SAS_FLUSH_RUNNING_CMDS)(struct MPT2SAS_ADAPTER *ioc);
817817
* @delayed_tr_list: target reset link list
818818
* @delayed_tr_volume_list: volume target reset link list
819819
* @@temp_sensors_count: flag to carry the number of temperature sensors
820+
* @pci_access_mutex: Mutex to synchronize ioctl,sysfs show path and
821+
* pci resource handling. PCI resource freeing will lead to free
822+
* vital hardware/memory resource, which might be in use by cli/sysfs
823+
* path functions resulting in Null pointer reference followed by kernel
824+
* crash. To avoid the above race condition we use mutex syncrhonization
825+
* which ensures the syncrhonization between cli/sysfs_show path
820826
*/
821827
struct MPT2SAS_ADAPTER {
822828
struct list_head list;
@@ -1033,6 +1039,7 @@ struct MPT2SAS_ADAPTER {
10331039
u8 mfg_pg10_hide_flag;
10341040
u8 hide_drives;
10351041

1042+
struct mutex pci_access_mutex;
10361043
};
10371044

10381045
typedef u8 (*MPT_CALLBACK)(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
@@ -1041,6 +1048,17 @@ typedef u8 (*MPT_CALLBACK)(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
10411048

10421049
/* base shared API */
10431050
extern struct list_head mpt2sas_ioc_list;
1051+
/* spinlock on list operations over IOCs
1052+
* Case: when multiple warpdrive cards(IOCs) are in use
1053+
* Each IOC will added to the ioc list stucture on initialization.
1054+
* Watchdog threads run at regular intervals to check IOC for any
1055+
* fault conditions which will trigger the dead_ioc thread to
1056+
* deallocate pci resource, resulting deleting the IOC netry from list,
1057+
* this deletion need to protected by spinlock to enusre that
1058+
* ioc removal is syncrhonized, if not synchronized it might lead to
1059+
* list_del corruption as the ioc list is traversed in cli path
1060+
*/
1061+
extern spinlock_t gioc_lock;
10441062
void mpt2sas_base_start_watchdog(struct MPT2SAS_ADAPTER *ioc);
10451063
void mpt2sas_base_stop_watchdog(struct MPT2SAS_ADAPTER *ioc);
10461064

@@ -1119,7 +1137,6 @@ struct _sas_device *__mpt2sas_get_sdev_by_addr(
11191137
struct MPT2SAS_ADAPTER *ioc, u64 sas_address);
11201138

11211139
void mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc);
1122-
11231140
void mpt2sas_scsih_reset_handler(struct MPT2SAS_ADAPTER *ioc, int reset_phase);
11241141

11251142
/* config shared API */

drivers/scsi/mpt2sas/mpt2sas_ctl.c

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -427,13 +427,16 @@ static int
427427
_ctl_verify_adapter(int ioc_number, struct MPT2SAS_ADAPTER **iocpp)
428428
{
429429
struct MPT2SAS_ADAPTER *ioc;
430-
430+
/* global ioc lock to protect controller on list operations */
431+
spin_lock(&gioc_lock);
431432
list_for_each_entry(ioc, &mpt2sas_ioc_list, list) {
432433
if (ioc->id != ioc_number)
433434
continue;
435+
spin_unlock(&gioc_lock);
434436
*iocpp = ioc;
435437
return ioc_number;
436438
}
439+
spin_unlock(&gioc_lock);
437440
*iocpp = NULL;
438441
return -1;
439442
}
@@ -522,10 +525,15 @@ _ctl_poll(struct file *filep, poll_table *wait)
522525

523526
poll_wait(filep, &ctl_poll_wait, wait);
524527

528+
/* global ioc lock to protect controller on list operations */
529+
spin_lock(&gioc_lock);
525530
list_for_each_entry(ioc, &mpt2sas_ioc_list, list) {
526-
if (ioc->aen_event_read_flag)
531+
if (ioc->aen_event_read_flag) {
532+
spin_unlock(&gioc_lock);
527533
return POLLIN | POLLRDNORM;
534+
}
528535
}
536+
spin_unlock(&gioc_lock);
529537
return 0;
530538
}
531539

@@ -2168,16 +2176,23 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg,
21682176

21692177
if (_ctl_verify_adapter(ioctl_header.ioc_number, &ioc) == -1 || !ioc)
21702178
return -ENODEV;
2179+
/* pci_access_mutex lock acquired by ioctl path */
2180+
mutex_lock(&ioc->pci_access_mutex);
21712181
if (ioc->shost_recovery || ioc->pci_error_recovery ||
2172-
ioc->is_driver_loading)
2173-
return -EAGAIN;
2182+
ioc->is_driver_loading || ioc->remove_host) {
2183+
ret = -EAGAIN;
2184+
goto out_unlock_pciaccess;
2185+
}
21742186

21752187
state = (file->f_flags & O_NONBLOCK) ? NON_BLOCKING : BLOCKING;
21762188
if (state == NON_BLOCKING) {
2177-
if (!mutex_trylock(&ioc->ctl_cmds.mutex))
2178-
return -EAGAIN;
2189+
if (!mutex_trylock(&ioc->ctl_cmds.mutex)) {
2190+
ret = -EAGAIN;
2191+
goto out_unlock_pciaccess;
2192+
}
21792193
} else if (mutex_lock_interruptible(&ioc->ctl_cmds.mutex)) {
2180-
return -ERESTARTSYS;
2194+
ret = -ERESTARTSYS;
2195+
goto out_unlock_pciaccess;
21812196
}
21822197

21832198
switch (cmd) {
@@ -2258,6 +2273,8 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg,
22582273
}
22592274

22602275
mutex_unlock(&ioc->ctl_cmds.mutex);
2276+
out_unlock_pciaccess:
2277+
mutex_unlock(&ioc->pci_access_mutex);
22612278
return ret;
22622279
}
22632280

@@ -2711,6 +2728,12 @@ _ctl_BRM_status_show(struct device *cdev, struct device_attribute *attr,
27112728
"warpdrive\n", ioc->name, __func__);
27122729
goto out;
27132730
}
2731+
/* pci_access_mutex lock acquired by sysfs show path */
2732+
mutex_lock(&ioc->pci_access_mutex);
2733+
if (ioc->pci_error_recovery || ioc->remove_host) {
2734+
mutex_unlock(&ioc->pci_access_mutex);
2735+
return 0;
2736+
}
27142737

27152738
/* allocate upto GPIOVal 36 entries */
27162739
sz = offsetof(Mpi2IOUnitPage3_t, GPIOVal) + (sizeof(u16) * 36);
@@ -2749,6 +2772,7 @@ _ctl_BRM_status_show(struct device *cdev, struct device_attribute *attr,
27492772

27502773
out:
27512774
kfree(io_unit_pg3);
2775+
mutex_unlock(&ioc->pci_access_mutex);
27522776
return rc;
27532777
}
27542778
static DEVICE_ATTR(BRM_status, S_IRUGO, _ctl_BRM_status_show, NULL);

drivers/scsi/mpt2sas/mpt2sas_scsih.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ static int _scsih_scan_finished(struct Scsi_Host *shost, unsigned long time);
7979

8080
/* global parameters */
8181
LIST_HEAD(mpt2sas_ioc_list);
82-
82+
/* global ioc lock for list operations */
83+
DEFINE_SPINLOCK(gioc_lock);
8384
/* local parameters */
8485
static u8 scsi_io_cb_idx = -1;
8586
static u8 tm_cb_idx = -1;
@@ -321,8 +322,10 @@ _scsih_set_debug_level(const char *val, struct kernel_param *kp)
321322
return ret;
322323

323324
printk(KERN_INFO "setting logging_level(0x%08x)\n", logging_level);
325+
spin_lock(&gioc_lock);
324326
list_for_each_entry(ioc, &mpt2sas_ioc_list, list)
325327
ioc->logging_level = logging_level;
328+
spin_unlock(&gioc_lock);
326329
return 0;
327330
}
328331
module_param_call(logging_level, _scsih_set_debug_level, param_get_int,
@@ -8081,7 +8084,9 @@ _scsih_remove(struct pci_dev *pdev)
80818084
sas_remove_host(shost);
80828085
scsi_remove_host(shost);
80838086
mpt2sas_base_detach(ioc);
8087+
spin_lock(&gioc_lock);
80848088
list_del(&ioc->list);
8089+
spin_unlock(&gioc_lock);
80858090
scsi_host_put(shost);
80868091
}
80878092

@@ -8394,7 +8399,9 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
83948399
ioc = shost_priv(shost);
83958400
memset(ioc, 0, sizeof(struct MPT2SAS_ADAPTER));
83968401
INIT_LIST_HEAD(&ioc->list);
8402+
spin_lock(&gioc_lock);
83978403
list_add_tail(&ioc->list, &mpt2sas_ioc_list);
8404+
spin_unlock(&gioc_lock);
83988405
ioc->shost = shost;
83998406
ioc->id = mpt_ids++;
84008407
sprintf(ioc->name, "%s%d", MPT2SAS_DRIVER_NAME, ioc->id);
@@ -8419,6 +8426,8 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
84198426
ioc->schedule_dead_ioc_flush_running_cmds = &_scsih_flush_running_cmds;
84208427
/* misc semaphores and spin locks */
84218428
mutex_init(&ioc->reset_in_progress_mutex);
8429+
/* initializing pci_access_mutex lock */
8430+
mutex_init(&ioc->pci_access_mutex);
84228431
spin_lock_init(&ioc->ioc_reset_in_progress_lock);
84238432
spin_lock_init(&ioc->scsi_lookup_lock);
84248433
spin_lock_init(&ioc->sas_device_lock);
@@ -8521,7 +8530,9 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
85218530
out_attach_fail:
85228531
destroy_workqueue(ioc->firmware_event_thread);
85238532
out_thread_fail:
8533+
spin_lock(&gioc_lock);
85248534
list_del(&ioc->list);
8535+
spin_unlock(&gioc_lock);
85258536
scsi_host_put(shost);
85268537
return rv;
85278538
}

0 commit comments

Comments
 (0)