Skip to content

Commit 9d9f720

Browse files
committed
Merge tag 'amd-drm-fixes-5.18-2022-04-27' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes
amd-drm-fixes-5.18-2022-04-27: amdgpu: - Runtime pm fix - DCN memory leak fix in error path - SI DPM deadlock fix - S0ix fix amdkfd: - GWS fix - GWS support for CRIU Signed-off-by: Dave Airlie <[email protected]> From: Alex Deucher <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
2 parents 22c73ba + fb8cc33 commit 9d9f720

File tree

10 files changed

+165
-140
lines changed

10 files changed

+165
-140
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

Lines changed: 70 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2395,6 +2395,71 @@ static int amdgpu_pmops_restore(struct device *dev)
23952395
return amdgpu_device_resume(drm_dev, true);
23962396
}
23972397

2398+
static int amdgpu_runtime_idle_check_display(struct device *dev)
2399+
{
2400+
struct pci_dev *pdev = to_pci_dev(dev);
2401+
struct drm_device *drm_dev = pci_get_drvdata(pdev);
2402+
struct amdgpu_device *adev = drm_to_adev(drm_dev);
2403+
2404+
if (adev->mode_info.num_crtc) {
2405+
struct drm_connector *list_connector;
2406+
struct drm_connector_list_iter iter;
2407+
int ret = 0;
2408+
2409+
/* XXX: Return busy if any displays are connected to avoid
2410+
* possible display wakeups after runtime resume due to
2411+
* hotplug events in case any displays were connected while
2412+
* the GPU was in suspend. Remove this once that is fixed.
2413+
*/
2414+
mutex_lock(&drm_dev->mode_config.mutex);
2415+
drm_connector_list_iter_begin(drm_dev, &iter);
2416+
drm_for_each_connector_iter(list_connector, &iter) {
2417+
if (list_connector->status == connector_status_connected) {
2418+
ret = -EBUSY;
2419+
break;
2420+
}
2421+
}
2422+
drm_connector_list_iter_end(&iter);
2423+
mutex_unlock(&drm_dev->mode_config.mutex);
2424+
2425+
if (ret)
2426+
return ret;
2427+
2428+
if (amdgpu_device_has_dc_support(adev)) {
2429+
struct drm_crtc *crtc;
2430+
2431+
drm_for_each_crtc(crtc, drm_dev) {
2432+
drm_modeset_lock(&crtc->mutex, NULL);
2433+
if (crtc->state->active)
2434+
ret = -EBUSY;
2435+
drm_modeset_unlock(&crtc->mutex);
2436+
if (ret < 0)
2437+
break;
2438+
}
2439+
} else {
2440+
mutex_lock(&drm_dev->mode_config.mutex);
2441+
drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL);
2442+
2443+
drm_connector_list_iter_begin(drm_dev, &iter);
2444+
drm_for_each_connector_iter(list_connector, &iter) {
2445+
if (list_connector->dpms == DRM_MODE_DPMS_ON) {
2446+
ret = -EBUSY;
2447+
break;
2448+
}
2449+
}
2450+
2451+
drm_connector_list_iter_end(&iter);
2452+
2453+
drm_modeset_unlock(&drm_dev->mode_config.connection_mutex);
2454+
mutex_unlock(&drm_dev->mode_config.mutex);
2455+
}
2456+
if (ret)
2457+
return ret;
2458+
}
2459+
2460+
return 0;
2461+
}
2462+
23982463
static int amdgpu_pmops_runtime_suspend(struct device *dev)
23992464
{
24002465
struct pci_dev *pdev = to_pci_dev(dev);
@@ -2407,6 +2472,10 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
24072472
return -EBUSY;
24082473
}
24092474

2475+
ret = amdgpu_runtime_idle_check_display(dev);
2476+
if (ret)
2477+
return ret;
2478+
24102479
/* wait for all rings to drain before suspending */
24112480
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
24122481
struct amdgpu_ring *ring = adev->rings[i];
@@ -2516,41 +2585,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
25162585
return -EBUSY;
25172586
}
25182587

2519-
if (amdgpu_device_has_dc_support(adev)) {
2520-
struct drm_crtc *crtc;
2521-
2522-
drm_for_each_crtc(crtc, drm_dev) {
2523-
drm_modeset_lock(&crtc->mutex, NULL);
2524-
if (crtc->state->active)
2525-
ret = -EBUSY;
2526-
drm_modeset_unlock(&crtc->mutex);
2527-
if (ret < 0)
2528-
break;
2529-
}
2530-
2531-
} else {
2532-
struct drm_connector *list_connector;
2533-
struct drm_connector_list_iter iter;
2534-
2535-
mutex_lock(&drm_dev->mode_config.mutex);
2536-
drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL);
2537-
2538-
drm_connector_list_iter_begin(drm_dev, &iter);
2539-
drm_for_each_connector_iter(list_connector, &iter) {
2540-
if (list_connector->dpms == DRM_MODE_DPMS_ON) {
2541-
ret = -EBUSY;
2542-
break;
2543-
}
2544-
}
2545-
2546-
drm_connector_list_iter_end(&iter);
2547-
2548-
drm_modeset_unlock(&drm_dev->mode_config.connection_mutex);
2549-
mutex_unlock(&drm_dev->mode_config.mutex);
2550-
}
2551-
2552-
if (ret == -EBUSY)
2553-
DRM_DEBUG_DRIVER("failing to power off - crtc active\n");
2588+
ret = amdgpu_runtime_idle_check_display(dev);
25542589

25552590
pm_runtime_mark_last_busy(dev);
25562591
pm_runtime_autosuspend(dev);

drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,6 +1151,16 @@ static int gmc_v10_0_set_clockgating_state(void *handle,
11511151
int r;
11521152
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
11531153

1154+
/*
1155+
* The issue mmhub can't disconnect from DF with MMHUB clock gating being disabled
1156+
* is a new problem observed at DF 3.0.3, however with the same suspend sequence not
1157+
* seen any issue on the DF 3.0.2 series platform.
1158+
*/
1159+
if (adev->in_s0ix && adev->ip_versions[DF_HWIP][0] > IP_VERSION(3, 0, 2)) {
1160+
dev_dbg(adev->dev, "keep mmhub clock gating being enabled for s0ix\n");
1161+
return 0;
1162+
}
1163+
11541164
r = adev->mmhub.funcs->set_clockgating(adev, state);
11551165
if (r)
11561166
return r;

drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

Lines changed: 37 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -130,19 +130,33 @@ void program_sh_mem_settings(struct device_queue_manager *dqm,
130130
}
131131

132132
static void increment_queue_count(struct device_queue_manager *dqm,
133-
enum kfd_queue_type type)
133+
struct qcm_process_device *qpd,
134+
struct queue *q)
134135
{
135136
dqm->active_queue_count++;
136-
if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
137+
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
138+
q->properties.type == KFD_QUEUE_TYPE_DIQ)
137139
dqm->active_cp_queue_count++;
140+
141+
if (q->properties.is_gws) {
142+
dqm->gws_queue_count++;
143+
qpd->mapped_gws_queue = true;
144+
}
138145
}
139146

140147
static void decrement_queue_count(struct device_queue_manager *dqm,
141-
enum kfd_queue_type type)
148+
struct qcm_process_device *qpd,
149+
struct queue *q)
142150
{
143151
dqm->active_queue_count--;
144-
if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
152+
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
153+
q->properties.type == KFD_QUEUE_TYPE_DIQ)
145154
dqm->active_cp_queue_count--;
155+
156+
if (q->properties.is_gws) {
157+
dqm->gws_queue_count--;
158+
qpd->mapped_gws_queue = false;
159+
}
146160
}
147161

148162
/*
@@ -412,7 +426,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
412426
list_add(&q->list, &qpd->queues_list);
413427
qpd->queue_count++;
414428
if (q->properties.is_active)
415-
increment_queue_count(dqm, q->properties.type);
429+
increment_queue_count(dqm, qpd, q);
416430

417431
/*
418432
* Unconditionally increment this counter, regardless of the queue's
@@ -601,13 +615,8 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
601615
deallocate_vmid(dqm, qpd, q);
602616
}
603617
qpd->queue_count--;
604-
if (q->properties.is_active) {
605-
decrement_queue_count(dqm, q->properties.type);
606-
if (q->properties.is_gws) {
607-
dqm->gws_queue_count--;
608-
qpd->mapped_gws_queue = false;
609-
}
610-
}
618+
if (q->properties.is_active)
619+
decrement_queue_count(dqm, qpd, q);
611620

612621
return retval;
613622
}
@@ -700,12 +709,11 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
700709
* dqm->active_queue_count to determine whether a new runlist must be
701710
* uploaded.
702711
*/
703-
if (q->properties.is_active && !prev_active)
704-
increment_queue_count(dqm, q->properties.type);
705-
else if (!q->properties.is_active && prev_active)
706-
decrement_queue_count(dqm, q->properties.type);
707-
708-
if (q->gws && !q->properties.is_gws) {
712+
if (q->properties.is_active && !prev_active) {
713+
increment_queue_count(dqm, &pdd->qpd, q);
714+
} else if (!q->properties.is_active && prev_active) {
715+
decrement_queue_count(dqm, &pdd->qpd, q);
716+
} else if (q->gws && !q->properties.is_gws) {
709717
if (q->properties.is_active) {
710718
dqm->gws_queue_count++;
711719
pdd->qpd.mapped_gws_queue = true;
@@ -767,11 +775,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
767775
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
768776
q->properties.type)];
769777
q->properties.is_active = false;
770-
decrement_queue_count(dqm, q->properties.type);
771-
if (q->properties.is_gws) {
772-
dqm->gws_queue_count--;
773-
qpd->mapped_gws_queue = false;
774-
}
778+
decrement_queue_count(dqm, qpd, q);
775779

776780
if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
777781
continue;
@@ -817,7 +821,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
817821
continue;
818822

819823
q->properties.is_active = false;
820-
decrement_queue_count(dqm, q->properties.type);
824+
decrement_queue_count(dqm, qpd, q);
821825
}
822826
pdd->last_evict_timestamp = get_jiffies_64();
823827
retval = execute_queues_cpsch(dqm,
@@ -888,11 +892,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
888892
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
889893
q->properties.type)];
890894
q->properties.is_active = true;
891-
increment_queue_count(dqm, q->properties.type);
892-
if (q->properties.is_gws) {
893-
dqm->gws_queue_count++;
894-
qpd->mapped_gws_queue = true;
895-
}
895+
increment_queue_count(dqm, qpd, q);
896896

897897
if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
898898
continue;
@@ -950,7 +950,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
950950
continue;
951951

952952
q->properties.is_active = true;
953-
increment_queue_count(dqm, q->properties.type);
953+
increment_queue_count(dqm, &pdd->qpd, q);
954954
}
955955
retval = execute_queues_cpsch(dqm,
956956
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
@@ -1378,7 +1378,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
13781378
dqm->total_queue_count);
13791379

13801380
list_add(&kq->list, &qpd->priv_queue_list);
1381-
increment_queue_count(dqm, kq->queue->properties.type);
1381+
increment_queue_count(dqm, qpd, kq->queue);
13821382
qpd->is_debug = true;
13831383
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
13841384
dqm_unlock(dqm);
@@ -1392,7 +1392,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
13921392
{
13931393
dqm_lock(dqm);
13941394
list_del(&kq->list);
1395-
decrement_queue_count(dqm, kq->queue->properties.type);
1395+
decrement_queue_count(dqm, qpd, kq->queue);
13961396
qpd->is_debug = false;
13971397
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
13981398
/*
@@ -1467,7 +1467,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
14671467
qpd->queue_count++;
14681468

14691469
if (q->properties.is_active) {
1470-
increment_queue_count(dqm, q->properties.type);
1470+
increment_queue_count(dqm, qpd, q);
14711471

14721472
execute_queues_cpsch(dqm,
14731473
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
@@ -1683,15 +1683,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
16831683
list_del(&q->list);
16841684
qpd->queue_count--;
16851685
if (q->properties.is_active) {
1686-
decrement_queue_count(dqm, q->properties.type);
1686+
decrement_queue_count(dqm, qpd, q);
16871687
retval = execute_queues_cpsch(dqm,
16881688
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
16891689
if (retval == -ETIME)
16901690
qpd->reset_wavefronts = true;
1691-
if (q->properties.is_gws) {
1692-
dqm->gws_queue_count--;
1693-
qpd->mapped_gws_queue = false;
1694-
}
16951691
}
16961692

16971693
/*
@@ -1932,7 +1928,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
19321928
/* Clean all kernel queues */
19331929
list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
19341930
list_del(&kq->list);
1935-
decrement_queue_count(dqm, kq->queue->properties.type);
1931+
decrement_queue_count(dqm, qpd, kq->queue);
19361932
qpd->is_debug = false;
19371933
dqm->total_queue_count--;
19381934
filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
@@ -1945,13 +1941,8 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
19451941
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
19461942
deallocate_sdma_queue(dqm, q);
19471943

1948-
if (q->properties.is_active) {
1949-
decrement_queue_count(dqm, q->properties.type);
1950-
if (q->properties.is_gws) {
1951-
dqm->gws_queue_count--;
1952-
qpd->mapped_gws_queue = false;
1953-
}
1954-
}
1944+
if (q->properties.is_active)
1945+
decrement_queue_count(dqm, qpd, q);
19551946

19561947
dqm->total_queue_count--;
19571948
}

drivers/gpu/drm/amd/amdkfd/kfd_priv.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1103,7 +1103,7 @@ struct kfd_criu_queue_priv_data {
11031103
uint32_t priority;
11041104
uint32_t q_percent;
11051105
uint32_t doorbell_id;
1106-
uint32_t is_gws;
1106+
uint32_t gws;
11071107
uint32_t sdma_id;
11081108
uint32_t eop_ring_buffer_size;
11091109
uint32_t ctx_save_restore_area_size;

drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,8 @@ static int criu_checkpoint_queue(struct kfd_process_device *pdd,
636636
q_data->ctx_save_restore_area_size =
637637
q->properties.ctx_save_restore_area_size;
638638

639+
q_data->gws = !!q->gws;
640+
639641
ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack);
640642
if (ret) {
641643
pr_err("Failed checkpoint queue_mqd (%d)\n", ret);
@@ -743,7 +745,6 @@ static void set_queue_properties_from_criu(struct queue_properties *qp,
743745
struct kfd_criu_queue_priv_data *q_data)
744746
{
745747
qp->is_interop = false;
746-
qp->is_gws = q_data->is_gws;
747748
qp->queue_percent = q_data->q_percent;
748749
qp->priority = q_data->priority;
749750
qp->queue_address = q_data->q_address;
@@ -826,12 +827,15 @@ int kfd_criu_restore_queue(struct kfd_process *p,
826827
NULL);
827828
if (ret) {
828829
pr_err("Failed to create new queue err:%d\n", ret);
829-
ret = -EINVAL;
830+
goto exit;
830831
}
831832

833+
if (q_data->gws)
834+
ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws);
835+
832836
exit:
833837
if (ret)
834-
pr_err("Failed to create queue (%d)\n", ret);
838+
pr_err("Failed to restore queue (%d)\n", ret);
835839
else
836840
pr_debug("Queue id %d was restored successfully\n", queue_id);
837841

drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,7 @@ static struct clock_source *dcn21_clock_source_create(
997997
return &clk_src->base;
998998
}
999999

1000+
kfree(clk_src);
10001001
BREAK_TO_DEBUGGER();
10011002
return NULL;
10021003
}

0 commit comments

Comments
 (0)