Skip to content

Commit 254361c

Browse files
ssanchez11dledford
authored andcommitted
IB/hfi1: Prevent LNI hang when LCB can't obtain lanes
When the LCB isn't able to get any lanes operational on the first transition into mission mode, the link transfer active never happens and the LNI stays in the polling state indefinitely. Reset LCB upon receiving an 8051 interrupt for LCB to try to obtain lanes with firmware version 1.25.0 or later. Also, update the LCB reset value in other parts of the code with a macro defined to make the code more maintainable and rename functions with the link_width label to link_mode to reflect the fact that those functions set and read link related data not just the link width. Reviewed-by: Michael J. Ruhl <[email protected]> Reviewed-by: Mike Marciniszyn <[email protected]> Signed-off-by: Sebastian Sanchez <[email protected]> Signed-off-by: Dennis Dalessandro <[email protected]> Signed-off-by: Doug Ledford <[email protected]>
1 parent f5e27a2 commit 254361c

File tree

3 files changed

+53
-20
lines changed

3 files changed

+53
-20
lines changed

drivers/infiniband/hw/hfi1/chip.c

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1032,8 +1032,8 @@ static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
10321032
u8 *vcu, u16 *vl15buf, u8 *crc_sizes);
10331033
static void read_vc_remote_link_width(struct hfi1_devdata *dd,
10341034
u8 *remote_tx_rate, u16 *link_widths);
1035-
static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
1036-
u8 *flag_bits, u16 *link_widths);
1035+
static void read_vc_local_link_mode(struct hfi1_devdata *dd, u8 *misc_bits,
1036+
u8 *flag_bits, u16 *link_widths);
10371037
static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
10381038
u8 *device_rev);
10391039
static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx);
@@ -6350,6 +6350,18 @@ static void handle_8051_request(struct hfi1_pportdata *ppd)
63506350
dd_dev_info(dd, "8051 request: request 0x%x not supported\n",
63516351
type);
63526352
hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
6353+
break;
6354+
case HREQ_LCB_RESET:
6355+
/* Put the LCB, RX FPE and TX FPE into reset */
6356+
write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_INTO_RESET);
6357+
/* Make sure the write completed */
6358+
(void)read_csr(dd, DCC_CFG_RESET);
6359+
/* Hold the reset long enough to take effect */
6360+
udelay(1);
6361+
/* Take the LCB, RX FPE and TX FPE out of reset */
6362+
write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_OUT_OF_RESET);
6363+
hreq_response(dd, HREQ_SUCCESS, 0);
6364+
63536365
break;
63546366
case HREQ_CONFIG_DONE:
63556367
hreq_response(dd, HREQ_SUCCESS, 0);
@@ -6461,8 +6473,7 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
64616473
dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN);
64626474
reg = read_csr(dd, DCC_CFG_RESET);
64636475
write_csr(dd, DCC_CFG_RESET, reg |
6464-
(1ull << DCC_CFG_RESET_RESET_LCB_SHIFT) |
6465-
(1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
6476+
DCC_CFG_RESET_RESET_LCB | DCC_CFG_RESET_RESET_RX_FPE);
64666477
(void)read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
64676478
if (!abort) {
64686479
udelay(1); /* must hold for the longer of 16cclks or 20ns */
@@ -6527,7 +6538,7 @@ static void _dc_start(struct hfi1_devdata *dd)
65276538
__func__);
65286539

65296540
/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
6530-
write_csr(dd, DCC_CFG_RESET, 0x10);
6541+
write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_OUT_OF_RESET);
65316542
/* lcb_shutdown() with abort=1 does not restore these */
65326543
write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
65336544
dd->dc_shutdown = 0;
@@ -7348,7 +7359,7 @@ static void get_linkup_widths(struct hfi1_devdata *dd, u16 *tx_width,
73487359
u8 misc_bits, local_flags;
73497360
u16 active_tx, active_rx;
73507361

7351-
read_vc_local_link_width(dd, &misc_bits, &local_flags, &widths);
7362+
read_vc_local_link_mode(dd, &misc_bits, &local_flags, &widths);
73527363
tx = widths >> 12;
73537364
rx = (widths >> 8) & 0xf;
73547365

@@ -8820,29 +8831,29 @@ static int write_vc_local_fabric(struct hfi1_devdata *dd, u8 vau, u8 z, u8 vcu,
88208831
GENERAL_CONFIG, frame);
88218832
}
88228833

8823-
static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
8824-
u8 *flag_bits, u16 *link_widths)
8834+
static void read_vc_local_link_mode(struct hfi1_devdata *dd, u8 *misc_bits,
8835+
u8 *flag_bits, u16 *link_widths)
88258836
{
88268837
u32 frame;
88278838

8828-
read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
8839+
read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_MODE, GENERAL_CONFIG,
88298840
&frame);
88308841
*misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK;
88318842
*flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK;
88328843
*link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
88338844
}
88348845

8835-
static int write_vc_local_link_width(struct hfi1_devdata *dd,
8836-
u8 misc_bits,
8837-
u8 flag_bits,
8838-
u16 link_widths)
8846+
static int write_vc_local_link_mode(struct hfi1_devdata *dd,
8847+
u8 misc_bits,
8848+
u8 flag_bits,
8849+
u16 link_widths)
88398850
{
88408851
u32 frame;
88418852

88428853
frame = (u32)misc_bits << MISC_CONFIG_BITS_SHIFT
88438854
| (u32)flag_bits << LOCAL_FLAG_BITS_SHIFT
88448855
| (u32)link_widths << LINK_WIDTH_SHIFT;
8845-
return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
8856+
return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_MODE, GENERAL_CONFIG,
88468857
frame);
88478858
}
88488859

@@ -9312,8 +9323,16 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
93129323
if (loopback == LOOPBACK_SERDES)
93139324
misc_bits |= 1 << LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT;
93149325

9315-
ret = write_vc_local_link_width(dd, misc_bits, 0,
9316-
opa_to_vc_link_widths(
9326+
/*
9327+
* An external device configuration request is used to reset the LCB
9328+
* to retry to obtain operational lanes when the first attempt is
9329+
* unsuccesful.
9330+
*/
9331+
if (dd->dc8051_ver >= dc8051_ver(1, 25, 0))
9332+
misc_bits |= 1 << EXT_CFG_LCB_RESET_SUPPORTED_SHIFT;
9333+
9334+
ret = write_vc_local_link_mode(dd, misc_bits, 0,
9335+
opa_to_vc_link_widths(
93179336
ppd->link_width_enabled));
93189337
if (ret != HCMD_SUCCESS)
93199338
goto set_local_link_attributes_fail;

drivers/infiniband/hw/hfi1/chip.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,15 @@
196196
#define LSTATE_ARMED 0x3
197197
#define LSTATE_ACTIVE 0x4
198198

199+
/* DCC_CFG_RESET reset states */
200+
#define LCB_RX_FPE_TX_FPE_INTO_RESET (DCC_CFG_RESET_RESET_LCB | \
201+
DCC_CFG_RESET_RESET_TX_FPE | \
202+
DCC_CFG_RESET_RESET_RX_FPE | \
203+
DCC_CFG_RESET_ENABLE_CCLK_BCC)
204+
/* 0x17 */
205+
206+
#define LCB_RX_FPE_TX_FPE_OUT_OF_RESET DCC_CFG_RESET_ENABLE_CCLK_BCC /* 0x10 */
207+
199208
/* DC8051_STS_CUR_STATE port values (physical link states) */
200209
#define PLS_DISABLED 0x30
201210
#define PLS_OFFLINE 0x90
@@ -283,6 +292,7 @@
283292
#define HREQ_SET_TX_EQ_ABS 0x04
284293
#define HREQ_SET_TX_EQ_REL 0x05
285294
#define HREQ_ENABLE 0x06
295+
#define HREQ_LCB_RESET 0x07
286296
#define HREQ_CONFIG_DONE 0xfe
287297
#define HREQ_INTERFACE_TEST 0xff
288298

@@ -383,7 +393,7 @@
383393
#define TX_SETTINGS 0x06
384394
#define VERIFY_CAP_LOCAL_PHY 0x07
385395
#define VERIFY_CAP_LOCAL_FABRIC 0x08
386-
#define VERIFY_CAP_LOCAL_LINK_WIDTH 0x09
396+
#define VERIFY_CAP_LOCAL_LINK_MODE 0x09
387397
#define LOCAL_DEVICE_ID 0x0a
388398
#define RESERVED_REGISTERS 0x0b
389399
#define LOCAL_LNI_INFO 0x0c
@@ -584,8 +594,9 @@ enum {
584594
#define LOOPBACK_LCB 2
585595
#define LOOPBACK_CABLE 3 /* external cable */
586596

587-
/* set up serdes bit in MISC_CONFIG_BITS */
597+
/* set up bits in MISC_CONFIG_BITS */
588598
#define LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT 0
599+
#define EXT_CFG_LCB_RESET_SUPPORTED_SHIFT 3
589600

590601
/* read and write hardware registers */
591602
u64 read_csr(const struct hfi1_devdata *dd, u32 offset);

drivers/infiniband/hw/hfi1/chip_registers.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,11 @@
9797
#define DCC_CFG_PORT_CONFIG_MTU_CAP_SHIFT 32
9898
#define DCC_CFG_PORT_CONFIG_MTU_CAP_SMASK 0x700000000ull
9999
#define DCC_CFG_RESET (DCC_CSRS + 0x000000000000)
100-
#define DCC_CFG_RESET_RESET_LCB_SHIFT 0
101-
#define DCC_CFG_RESET_RESET_RX_FPE_SHIFT 2
100+
#define DCC_CFG_RESET_RESET_LCB BIT_ULL(0)
101+
#define DCC_CFG_RESET_RESET_TX_FPE BIT_ULL(1)
102+
#define DCC_CFG_RESET_RESET_RX_FPE BIT_ULL(2)
103+
#define DCC_CFG_RESET_RESET_8051 BIT_ULL(3)
104+
#define DCC_CFG_RESET_ENABLE_CCLK_BCC BIT_ULL(4)
102105
#define DCC_CFG_SC_VL_TABLE_15_0 (DCC_CSRS + 0x000000000028)
103106
#define DCC_CFG_SC_VL_TABLE_15_0_ENTRY0_SHIFT 0
104107
#define DCC_CFG_SC_VL_TABLE_15_0_ENTRY10_SHIFT 40

0 commit comments

Comments
 (0)