Skip to content

Commit b2b9a8d

Browse files
emuslndavem330
authored andcommitted
ionic: avoid races in ionic_heartbeat_check
Rework the heartbeat checks to be sure that we're getting an atomic operation. Through testing we found occasions where a separate thread could clash with this check and cause erroneous heartbeat check results. Signed-off-by: Allen Hubbe <[email protected]> Signed-off-by: Shannon Nelson <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 230efff commit b2b9a8d

File tree

2 files changed

+63
-37
lines changed

2 files changed

+63
-37
lines changed

drivers/net/ethernet/pensando/ionic/ionic_dev.c

Lines changed: 58 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ static void ionic_watchdog_cb(struct timer_list *t)
2424
return;
2525

2626
hb = ionic_heartbeat_check(ionic);
27+
dev_dbg(ionic->dev, "%s: hb %d running %d UP %d\n",
28+
__func__, hb, netif_running(lif->netdev),
29+
test_bit(IONIC_LIF_F_UP, lif->state));
2730

2831
if (hb >= 0 &&
2932
!test_bit(IONIC_LIF_F_FW_RESET, lif->state))
@@ -91,9 +94,17 @@ int ionic_dev_setup(struct ionic *ionic)
9194
return -EFAULT;
9295
}
9396

94-
idev->last_fw_status = 0xff;
9597
timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0);
9698
ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ;
99+
100+
/* set times to ensure the first check will proceed */
101+
atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ);
102+
idev->last_hb_time = jiffies - 2 * ionic->watchdog_period;
103+
/* init as ready, so no transition if the first check succeeds */
104+
idev->last_fw_hb = 0;
105+
idev->fw_hb_ready = true;
106+
idev->fw_status_ready = true;
107+
97108
mod_timer(&ionic->watchdog_timer,
98109
round_jiffies(jiffies + ionic->watchdog_period));
99110

@@ -107,29 +118,38 @@ int ionic_dev_setup(struct ionic *ionic)
107118
int ionic_heartbeat_check(struct ionic *ionic)
108119
{
109120
struct ionic_dev *idev = &ionic->idev;
110-
unsigned long hb_time;
121+
unsigned long check_time, last_check_time;
122+
bool fw_status_ready, fw_hb_ready;
111123
u8 fw_status;
112-
u32 hb;
124+
u32 fw_hb;
113125

114-
/* wait a little more than one second before testing again */
115-
hb_time = jiffies;
116-
if (time_before(hb_time, (idev->last_hb_time + ionic->watchdog_period)))
126+
/* wait a least one second before testing again */
127+
check_time = jiffies;
128+
last_check_time = atomic_long_read(&idev->last_check_time);
129+
do_check_time:
130+
if (time_before(check_time, last_check_time + HZ))
117131
return 0;
132+
if (!atomic_long_try_cmpxchg_relaxed(&idev->last_check_time,
133+
&last_check_time, check_time)) {
134+
/* if called concurrently, only the first should proceed. */
135+
dev_dbg(ionic->dev, "%s: do_check_time again\n", __func__);
136+
goto do_check_time;
137+
}
118138

119139
/* firmware is useful only if the running bit is set and
120140
* fw_status != 0xff (bad PCI read)
121141
*/
122142
fw_status = ioread8(&idev->dev_info_regs->fw_status);
123-
if (fw_status != 0xff)
124-
fw_status &= IONIC_FW_STS_F_RUNNING; /* use only the run bit */
143+
fw_status_ready = (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING);
125144

126145
/* is this a transition? */
127-
if (fw_status != idev->last_fw_status &&
128-
idev->last_fw_status != 0xff) {
146+
if (fw_status_ready != idev->fw_status_ready) {
129147
struct ionic_lif *lif = ionic->lif;
130148
bool trigger = false;
131149

132-
if (!fw_status || fw_status == 0xff) {
150+
idev->fw_status_ready = fw_status_ready;
151+
152+
if (!fw_status_ready) {
133153
dev_info(ionic->dev, "FW stopped %u\n", fw_status);
134154
if (lif && !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
135155
trigger = true;
@@ -143,44 +163,47 @@ int ionic_heartbeat_check(struct ionic *ionic)
143163
struct ionic_deferred_work *work;
144164

145165
work = kzalloc(sizeof(*work), GFP_ATOMIC);
146-
if (!work) {
147-
dev_err(ionic->dev, "LIF reset trigger dropped\n");
148-
} else {
166+
if (work) {
149167
work->type = IONIC_DW_TYPE_LIF_RESET;
150-
if (fw_status & IONIC_FW_STS_F_RUNNING &&
151-
fw_status != 0xff)
152-
work->fw_status = 1;
168+
work->fw_status = fw_status_ready;
153169
ionic_lif_deferred_enqueue(&lif->deferred, work);
154170
}
155171
}
156172
}
157-
idev->last_fw_status = fw_status;
158173

159-
if (!fw_status || fw_status == 0xff)
174+
if (!fw_status_ready)
160175
return -ENXIO;
161176

162-
/* early FW has no heartbeat, else FW will return non-zero */
163-
hb = ioread32(&idev->dev_info_regs->fw_heartbeat);
164-
if (!hb)
177+
/* wait at least one watchdog period since the last heartbeat */
178+
last_check_time = idev->last_hb_time;
179+
if (time_before(check_time, last_check_time + ionic->watchdog_period))
165180
return 0;
166181

167-
/* are we stalled? */
168-
if (hb == idev->last_hb) {
169-
/* only complain once for each stall seen */
170-
if (idev->last_hb_time != 1) {
171-
dev_info(ionic->dev, "FW heartbeat stalled at %d\n",
172-
idev->last_hb);
173-
idev->last_hb_time = 1;
174-
}
182+
fw_hb = ioread32(&idev->dev_info_regs->fw_heartbeat);
183+
fw_hb_ready = fw_hb != idev->last_fw_hb;
175184

176-
return -ENXIO;
185+
/* early FW version had no heartbeat, so fake it */
186+
if (!fw_hb_ready && !fw_hb)
187+
fw_hb_ready = true;
188+
189+
dev_dbg(ionic->dev, "%s: fw_hb %u last_fw_hb %u ready %u\n",
190+
__func__, fw_hb, idev->last_fw_hb, fw_hb_ready);
191+
192+
idev->last_fw_hb = fw_hb;
193+
194+
/* log a transition */
195+
if (fw_hb_ready != idev->fw_hb_ready) {
196+
idev->fw_hb_ready = fw_hb_ready;
197+
if (!fw_hb_ready)
198+
dev_info(ionic->dev, "FW heartbeat stalled at %d\n", fw_hb);
199+
else
200+
dev_info(ionic->dev, "FW heartbeat restored at %d\n", fw_hb);
177201
}
178202

179-
if (idev->last_hb_time == 1)
180-
dev_info(ionic->dev, "FW heartbeat restored at %d\n", hb);
203+
if (!fw_hb_ready)
204+
return -ENXIO;
181205

182-
idev->last_hb = hb;
183-
idev->last_hb_time = hb_time;
206+
idev->last_hb_time = check_time;
184207

185208
return 0;
186209
}

drivers/net/ethernet/pensando/ionic/ionic_dev.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#ifndef _IONIC_DEV_H_
55
#define _IONIC_DEV_H_
66

7+
#include <linux/atomic.h>
78
#include <linux/mutex.h>
89
#include <linux/workqueue.h>
910

@@ -135,9 +136,11 @@ struct ionic_dev {
135136
union ionic_dev_info_regs __iomem *dev_info_regs;
136137
union ionic_dev_cmd_regs __iomem *dev_cmd_regs;
137138

139+
atomic_long_t last_check_time;
138140
unsigned long last_hb_time;
139-
u32 last_hb;
140-
u8 last_fw_status;
141+
u32 last_fw_hb;
142+
bool fw_hb_ready;
143+
bool fw_status_ready;
141144

142145
u64 __iomem *db_pages;
143146
dma_addr_t phy_db_pages;

0 commit comments

Comments
 (0)