Skip to content

Commit ead08b9

Browse files
Stylon Wangalexdeucher
authored andcommitted
drm/amd/display: Fix race condition in DPIA AUX transfer
[Why] This fix was intended for improving on coding style but in the process uncovers a race condition, which explains why we are getting incorrect length in DPIA AUX replies. Due to the call path of DPIA AUX going from DC back to DM layer then again into DC and the added complexities on top of current DC AUX implementation, a proper fix to rely on current dc_lock to address the race condition is difficult without a major overhual on how DPIA AUX is implemented. [How] - Add a mutex dpia_aux_lock to protect DPIA AUX transfers - Remove DMUB_ASYNC_TO_SYNC_ACCESS_* codes and rely solely on aux_return_code_type for error reporting and handling - Separate SET_CONFIG from DPIA AUX transfer because they have quiet different processing logic - Remove unnecessary type casting to and from void * type Reviewed-by: Nicholas Kazlauskas <[email protected]> Acked-by: Jasdeep Dhillon <[email protected]> Signed-off-by: Stylon Wang <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 719b59a commit ead08b9

File tree

3 files changed

+89
-85
lines changed

3 files changed

+89
-85
lines changed

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

Lines changed: 71 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -146,14 +146,6 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU);
146146
/* Number of bytes in PSP footer for firmware. */
147147
#define PSP_FOOTER_BYTES 0x100
148148

149-
/*
150-
* DMUB Async to Sync Mechanism Status
151-
*/
152-
#define DMUB_ASYNC_TO_SYNC_ACCESS_FAIL 1
153-
#define DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT 2
154-
#define DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS 3
155-
#define DMUB_ASYNC_TO_SYNC_ACCESS_INVALID 4
156-
157149
/**
158150
* DOC: overview
159151
*
@@ -1441,6 +1433,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
14411433
memset(&init_params, 0, sizeof(init_params));
14421434
#endif
14431435

1436+
mutex_init(&adev->dm.dpia_aux_lock);
14441437
mutex_init(&adev->dm.dc_lock);
14451438
mutex_init(&adev->dm.audio_lock);
14461439

@@ -1805,6 +1798,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
18051798

18061799
mutex_destroy(&adev->dm.audio_lock);
18071800
mutex_destroy(&adev->dm.dc_lock);
1801+
mutex_destroy(&adev->dm.dpia_aux_lock);
18081802

18091803
return;
18101804
}
@@ -10204,91 +10198,92 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address,
1020410198
return value;
1020510199
}
1020610200

10207-
static int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux,
10208-
struct dc_context *ctx,
10209-
uint8_t status_type,
10210-
uint32_t *operation_result)
10201+
int amdgpu_dm_process_dmub_aux_transfer_sync(
10202+
struct dc_context *ctx,
10203+
unsigned int link_index,
10204+
struct aux_payload *payload,
10205+
enum aux_return_code_type *operation_result)
1021110206
{
1021210207
struct amdgpu_device *adev = ctx->driver_context;
10213-
int return_status = -1;
1021410208
struct dmub_notification *p_notify = adev->dm.dmub_notify;
10209+
int ret = -1;
1021510210

10216-
if (is_cmd_aux) {
10217-
if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) {
10218-
return_status = p_notify->aux_reply.length;
10219-
*operation_result = p_notify->result;
10220-
} else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT) {
10221-
*operation_result = AUX_RET_ERROR_TIMEOUT;
10222-
} else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_FAIL) {
10223-
*operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
10224-
} else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_INVALID) {
10225-
*operation_result = AUX_RET_ERROR_INVALID_REPLY;
10226-
} else {
10227-
*operation_result = AUX_RET_ERROR_UNKNOWN;
10211+
mutex_lock(&adev->dm.dpia_aux_lock);
10212+
if (!dc_process_dmub_aux_transfer_async(ctx->dc, link_index, payload)) {
10213+
*operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
10214+
goto out;
10215+
}
10216+
10217+
if (!wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) {
10218+
DRM_ERROR("wait_for_completion_timeout timeout!");
10219+
*operation_result = AUX_RET_ERROR_TIMEOUT;
10220+
goto out;
10221+
}
10222+
10223+
if (p_notify->result != AUX_RET_SUCCESS) {
10224+
/*
10225+
* Transient states before tunneling is enabled could
10226+
* lead to this error. We can ignore this for now.
10227+
*/
10228+
if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) {
10229+
DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n",
10230+
payload->address, payload->length,
10231+
p_notify->result);
1022810232
}
10229-
} else {
10230-
if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) {
10231-
return_status = 0;
10232-
*operation_result = p_notify->sc_status;
10233-
} else {
10234-
*operation_result = SET_CONFIG_UNKNOWN_ERROR;
10233+
*operation_result = AUX_RET_ERROR_INVALID_REPLY;
10234+
goto out;
10235+
}
10236+
10237+
10238+
payload->reply[0] = adev->dm.dmub_notify->aux_reply.command;
10239+
if (!payload->write && p_notify->aux_reply.length &&
10240+
(payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) {
10241+
10242+
if (payload->length != p_notify->aux_reply.length) {
10243+
DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n",
10244+
p_notify->aux_reply.length,
10245+
payload->address, payload->length);
10246+
*operation_result = AUX_RET_ERROR_INVALID_REPLY;
10247+
goto out;
1023510248
}
10249+
10250+
memcpy(payload->data, p_notify->aux_reply.data,
10251+
p_notify->aux_reply.length);
1023610252
}
1023710253

10238-
return return_status;
10254+
/* success */
10255+
ret = p_notify->aux_reply.length;
10256+
*operation_result = p_notify->result;
10257+
out:
10258+
mutex_unlock(&adev->dm.dpia_aux_lock);
10259+
return ret;
1023910260
}
1024010261

10241-
int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context *ctx,
10242-
unsigned int link_index, void *cmd_payload, void *operation_result)
10262+
int amdgpu_dm_process_dmub_set_config_sync(
10263+
struct dc_context *ctx,
10264+
unsigned int link_index,
10265+
struct set_config_cmd_payload *payload,
10266+
enum set_config_status *operation_result)
1024310267
{
1024410268
struct amdgpu_device *adev = ctx->driver_context;
10245-
int ret = 0;
10269+
bool is_cmd_complete;
10270+
int ret;
1024610271

10247-
if (is_cmd_aux) {
10248-
dc_process_dmub_aux_transfer_async(ctx->dc,
10249-
link_index, (struct aux_payload *)cmd_payload);
10250-
} else if (dc_process_dmub_set_config_async(ctx->dc, link_index,
10251-
(struct set_config_cmd_payload *)cmd_payload,
10252-
adev->dm.dmub_notify)) {
10253-
return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
10254-
ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS,
10255-
(uint32_t *)operation_result);
10256-
}
10272+
mutex_lock(&adev->dm.dpia_aux_lock);
10273+
is_cmd_complete = dc_process_dmub_set_config_async(ctx->dc,
10274+
link_index, payload, adev->dm.dmub_notify);
1025710275

10258-
ret = wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ);
10259-
if (ret == 0) {
10276+
if (is_cmd_complete || wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) {
10277+
ret = 0;
10278+
*operation_result = adev->dm.dmub_notify->sc_status;
10279+
} else {
1026010280
DRM_ERROR("wait_for_completion_timeout timeout!");
10261-
return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
10262-
ctx, DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT,
10263-
(uint32_t *)operation_result);
10264-
}
10265-
10266-
if (is_cmd_aux) {
10267-
if (adev->dm.dmub_notify->result == AUX_RET_SUCCESS) {
10268-
struct aux_payload *payload = (struct aux_payload *)cmd_payload;
10269-
10270-
payload->reply[0] = adev->dm.dmub_notify->aux_reply.command;
10271-
if (!payload->write && adev->dm.dmub_notify->aux_reply.length &&
10272-
payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK) {
10273-
10274-
if (payload->length != adev->dm.dmub_notify->aux_reply.length) {
10275-
DRM_WARN("invalid read from DPIA AUX %x(%d) got length %d!\n",
10276-
payload->address, payload->length,
10277-
adev->dm.dmub_notify->aux_reply.length);
10278-
return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, ctx,
10279-
DMUB_ASYNC_TO_SYNC_ACCESS_INVALID,
10280-
(uint32_t *)operation_result);
10281-
}
10282-
10283-
memcpy(payload->data, adev->dm.dmub_notify->aux_reply.data,
10284-
adev->dm.dmub_notify->aux_reply.length);
10285-
}
10286-
}
10281+
ret = -1;
10282+
*operation_result = SET_CONFIG_UNKNOWN_ERROR;
1028710283
}
1028810284

10289-
return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
10290-
ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS,
10291-
(uint32_t *)operation_result);
10285+
mutex_unlock(&adev->dm.dpia_aux_lock);
10286+
return ret;
1029210287
}
1029310288

1029410289
/*

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,9 @@
5959
#include "signal_types.h"
6060
#include "amdgpu_dm_crc.h"
6161
struct aux_payload;
62+
struct set_config_cmd_payload;
6263
enum aux_return_code_type;
64+
enum set_config_status;
6365

6466
/* Forward declarations */
6567
struct amdgpu_device;
@@ -542,6 +544,13 @@ struct amdgpu_display_manager {
542544
* occurred on certain intel platform
543545
*/
544546
bool aux_hpd_discon_quirk;
547+
548+
/**
549+
* @dpia_aux_lock:
550+
*
551+
* Guards access to DPIA AUX
552+
*/
553+
struct mutex dpia_aux_lock;
545554
};
546555

547556
enum dsc_clock_force_state {
@@ -785,9 +794,11 @@ void amdgpu_dm_update_connector_after_detect(
785794

786795
extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs;
787796

788-
int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux,
789-
struct dc_context *ctx, unsigned int link_index,
790-
void *payload, void *operation_result);
797+
int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int link_index,
798+
struct aux_payload *payload, enum aux_return_code_type *operation_result);
799+
800+
int amdgpu_dm_process_dmub_set_config_sync(struct dc_context *ctx, unsigned int link_index,
801+
struct set_config_cmd_payload *payload, enum set_config_status *operation_result);
791802

792803
bool check_seamless_boot_capability(struct amdgpu_device *adev);
793804

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -817,19 +817,17 @@ int dm_helper_dmub_aux_transfer_sync(
817817
struct aux_payload *payload,
818818
enum aux_return_code_type *operation_result)
819819
{
820-
return amdgpu_dm_process_dmub_aux_transfer_sync(true, ctx,
821-
link->link_index, (void *)payload,
822-
(void *)operation_result);
820+
return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload,
821+
operation_result);
823822
}
824823

825824
int dm_helpers_dmub_set_config_sync(struct dc_context *ctx,
826825
const struct dc_link *link,
827826
struct set_config_cmd_payload *payload,
828827
enum set_config_status *operation_result)
829828
{
830-
return amdgpu_dm_process_dmub_aux_transfer_sync(false, ctx,
831-
link->link_index, (void *)payload,
832-
(void *)operation_result);
829+
return amdgpu_dm_process_dmub_set_config_sync(ctx, link->link_index, payload,
830+
operation_result);
833831
}
834832

835833
void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks)

0 commit comments

Comments
 (0)