Skip to content

Commit 09f79fd

Browse files
anjalisinghai1Jeff Kirsher
authored andcommitted
i40e: avoid NVM acquire deadlock during NVM update
X722 devices use the AdminQ to access the NVM, and this requires taking the AdminQ lock. Because of this, we lock the AdminQ during i40e_read_nvm(), which is also called in places where the lock is already held, such as the firmware update path which wants to lock once and then unlock when finished after performing several tasks. Although this should have only affected X722 devices, commit 96a39ae ("i40e: Acquire NVM lock before reads on all devices", 2016-12-02) added locking for all NVM reads, regardless of device family. This resulted in us accidentally causing NVM acquire timeouts on all devices, causing failed firmware updates which left the eeprom in a corrupt state. Create unsafe non-locked variants of i40e_read_nvm_word and i40e_read_nvm_buffer, __i40e_read_nvm_word and __i40e_read_nvm_buffer respectively. These variants will not take the NVM lock and are expected to only be called in places where the NVM lock is already held if needed. Since the only caller of i40e_read_nvm_buffer() was in such a path, remove it entirely in favor of the unsafe version. If necessary we can always add it back in the future. Additionally, we now need to hold the NVM lock in i40e_validate_checksum because the call to i40e_calc_nvm_checksum now assumes that the NVM lock is held. We can further move the call to read I40E_SR_SW_CHECKSUM_WORD up a bit so that we do not need to acquire the NVM lock twice. This should resolve firmware updates and also fix potential raise that could have caused the driver to report an invalid NVM checksum upon driver load. Reported-by: Stefan Assmann <[email protected]> Fixes: 96a39ae ("i40e: Acquire NVM lock before reads on all devices", 2016-12-02) Signed-off-by: Anjali Singhai Jain <[email protected]> Signed-off-by: Jacob Keller <[email protected]> Tested-by: Andrew Bowers <[email protected]> Signed-off-by: Jeff Kirsher <[email protected]>
1 parent 6d9c153 commit 09f79fd

File tree

2 files changed

+60
-40
lines changed

2 files changed

+60
-40
lines changed

drivers/net/ethernet/intel/i40e/i40e_nvm.c

Lines changed: 60 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ static i40e_status i40e_read_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
266266
* @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
267267
* @data: word read from the Shadow RAM
268268
*
269-
* Reads one 16 bit word from the Shadow RAM using the GLNVM_SRCTL register.
269+
* Reads one 16 bit word from the Shadow RAM using the AdminQ
270270
**/
271271
static i40e_status i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset,
272272
u16 *data)
@@ -280,27 +280,49 @@ static i40e_status i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset,
280280
}
281281

282282
/**
283-
* i40e_read_nvm_word - Reads Shadow RAM
283+
* __i40e_read_nvm_word - Reads nvm word, assumes called does the locking
284284
* @hw: pointer to the HW structure
285285
* @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
286286
* @data: word read from the Shadow RAM
287287
*
288-
* Reads one 16 bit word from the Shadow RAM using the GLNVM_SRCTL register.
288+
* Reads one 16 bit word from the Shadow RAM.
289+
*
290+
* Do not use this function except in cases where the nvm lock is already
291+
* taken via i40e_acquire_nvm().
292+
**/
293+
static i40e_status __i40e_read_nvm_word(struct i40e_hw *hw,
294+
u16 offset, u16 *data)
295+
{
296+
i40e_status ret_code = 0;
297+
298+
if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE)
299+
ret_code = i40e_read_nvm_word_aq(hw, offset, data);
300+
else
301+
ret_code = i40e_read_nvm_word_srctl(hw, offset, data);
302+
return ret_code;
303+
}
304+
305+
/**
306+
* i40e_read_nvm_word - Reads nvm word and acquire lock if necessary
307+
* @hw: pointer to the HW structure
308+
* @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
309+
* @data: word read from the Shadow RAM
310+
*
311+
* Reads one 16 bit word from the Shadow RAM.
289312
**/
290313
i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
291314
u16 *data)
292315
{
293-
enum i40e_status_code ret_code = 0;
316+
i40e_status ret_code = 0;
294317

295318
ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
296-
if (!ret_code) {
297-
if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) {
298-
ret_code = i40e_read_nvm_word_aq(hw, offset, data);
299-
} else {
300-
ret_code = i40e_read_nvm_word_srctl(hw, offset, data);
301-
}
302-
i40e_release_nvm(hw);
303-
}
319+
if (ret_code)
320+
return ret_code;
321+
322+
ret_code = __i40e_read_nvm_word(hw, offset, data);
323+
324+
i40e_release_nvm(hw);
325+
304326
return ret_code;
305327
}
306328

@@ -393,31 +415,25 @@ static i40e_status i40e_read_nvm_buffer_aq(struct i40e_hw *hw, u16 offset,
393415
}
394416

395417
/**
396-
* i40e_read_nvm_buffer - Reads Shadow RAM buffer
418+
* __i40e_read_nvm_buffer - Reads nvm buffer, caller must acquire lock
397419
* @hw: pointer to the HW structure
398420
* @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
399421
* @words: (in) number of words to read; (out) number of words actually read
400422
* @data: words read from the Shadow RAM
401423
*
402424
* Reads 16 bit words (data buffer) from the SR using the i40e_read_nvm_srrd()
403-
* method. The buffer read is preceded by the NVM ownership take
404-
* and followed by the release.
425+
* method.
405426
**/
406-
i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
407-
u16 *words, u16 *data)
427+
static i40e_status __i40e_read_nvm_buffer(struct i40e_hw *hw,
428+
u16 offset, u16 *words,
429+
u16 *data)
408430
{
409-
enum i40e_status_code ret_code = 0;
431+
i40e_status ret_code = 0;
410432

411-
if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) {
412-
ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
413-
if (!ret_code) {
414-
ret_code = i40e_read_nvm_buffer_aq(hw, offset, words,
415-
data);
416-
i40e_release_nvm(hw);
417-
}
418-
} else {
433+
if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE)
434+
ret_code = i40e_read_nvm_buffer_aq(hw, offset, words, data);
435+
else
419436
ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data);
420-
}
421437
return ret_code;
422438
}
423439

@@ -499,15 +515,15 @@ static i40e_status i40e_calc_nvm_checksum(struct i40e_hw *hw,
499515
data = (u16 *)vmem.va;
500516

501517
/* read pointer to VPD area */
502-
ret_code = i40e_read_nvm_word(hw, I40E_SR_VPD_PTR, &vpd_module);
518+
ret_code = __i40e_read_nvm_word(hw, I40E_SR_VPD_PTR, &vpd_module);
503519
if (ret_code) {
504520
ret_code = I40E_ERR_NVM_CHECKSUM;
505521
goto i40e_calc_nvm_checksum_exit;
506522
}
507523

508524
/* read pointer to PCIe Alt Auto-load module */
509-
ret_code = i40e_read_nvm_word(hw, I40E_SR_PCIE_ALT_AUTO_LOAD_PTR,
510-
&pcie_alt_module);
525+
ret_code = __i40e_read_nvm_word(hw, I40E_SR_PCIE_ALT_AUTO_LOAD_PTR,
526+
&pcie_alt_module);
511527
if (ret_code) {
512528
ret_code = I40E_ERR_NVM_CHECKSUM;
513529
goto i40e_calc_nvm_checksum_exit;
@@ -521,7 +537,7 @@ static i40e_status i40e_calc_nvm_checksum(struct i40e_hw *hw,
521537
if ((i % I40E_SR_SECTOR_SIZE_IN_WORDS) == 0) {
522538
u16 words = I40E_SR_SECTOR_SIZE_IN_WORDS;
523539

524-
ret_code = i40e_read_nvm_buffer(hw, i, &words, data);
540+
ret_code = __i40e_read_nvm_buffer(hw, i, &words, data);
525541
if (ret_code) {
526542
ret_code = I40E_ERR_NVM_CHECKSUM;
527543
goto i40e_calc_nvm_checksum_exit;
@@ -593,14 +609,19 @@ i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
593609
u16 checksum_sr = 0;
594610
u16 checksum_local = 0;
595611

612+
/* We must acquire the NVM lock in order to correctly synchronize the
613+
* NVM accesses across multiple PFs. Without doing so it is possible
614+
* for one of the PFs to read invalid data potentially indicating that
615+
* the checksum is invalid.
616+
*/
617+
ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
618+
if (ret_code)
619+
return ret_code;
596620
ret_code = i40e_calc_nvm_checksum(hw, &checksum_local);
621+
__i40e_read_nvm_word(hw, I40E_SR_SW_CHECKSUM_WORD, &checksum_sr);
622+
i40e_release_nvm(hw);
597623
if (ret_code)
598-
goto i40e_validate_nvm_checksum_exit;
599-
600-
/* Do not use i40e_read_nvm_word() because we do not want to take
601-
* the synchronization semaphores twice here.
602-
*/
603-
i40e_read_nvm_word(hw, I40E_SR_SW_CHECKSUM_WORD, &checksum_sr);
624+
return ret_code;
604625

605626
/* Verify read checksum from EEPROM is the same as
606627
* calculated checksum
@@ -612,7 +633,6 @@ i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
612633
if (checksum)
613634
*checksum = checksum_local;
614635

615-
i40e_validate_nvm_checksum_exit:
616636
return ret_code;
617637
}
618638

@@ -997,6 +1017,7 @@ static i40e_status i40e_nvmupd_state_writing(struct i40e_hw *hw,
9971017
break;
9981018

9991019
case I40E_NVMUPD_CSUM_CON:
1020+
/* Assumes the caller has acquired the nvm */
10001021
status = i40e_update_nvm_checksum(hw);
10011022
if (status) {
10021023
*perrno = hw->aq.asq_last_status ?
@@ -1011,6 +1032,7 @@ static i40e_status i40e_nvmupd_state_writing(struct i40e_hw *hw,
10111032
break;
10121033

10131034
case I40E_NVMUPD_CSUM_LCB:
1035+
/* Assumes the caller has acquired the nvm */
10141036
status = i40e_update_nvm_checksum(hw);
10151037
if (status) {
10161038
*perrno = hw->aq.asq_last_status ?

drivers/net/ethernet/intel/i40e/i40e_prototype.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,8 +311,6 @@ i40e_status i40e_acquire_nvm(struct i40e_hw *hw,
311311
void i40e_release_nvm(struct i40e_hw *hw);
312312
i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
313313
u16 *data);
314-
i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
315-
u16 *words, u16 *data);
316314
i40e_status i40e_update_nvm_checksum(struct i40e_hw *hw);
317315
i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
318316
u16 *checksum);

0 commit comments

Comments
 (0)