Skip to content

Commit 119b225

Browse files
committed
Merge tag 'amd-drm-next-6.9-2024-03-08-1' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.9-2024-03-08-1: amdgpu: - DCN 3.5.1 support - Fixes for IOMMUv2 removal - UAF fix - Misc small fixes and cleanups - SR-IOV fixes - MCBP cleanup - devcoredump update - NBIF 6.3.1 support - VPE 6.1.1 support amdkfd: - Misc fixes and cleanups - GFX10.1 trap fixes Signed-off-by: Dave Airlie <[email protected]> From: Alex Deucher <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
2 parents b9511c6 + 5eabf0c commit 119b225

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+122906
-791
lines changed

drivers/gpu/drm/amd/amdgpu/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ amdgpu-y += \
9898
vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \
9999
nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \
100100
sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \
101-
nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o
101+
nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o
102102

103103
# add DF block
104104
amdgpu-y += \

drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,9 +1782,14 @@ static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused)
17821782
list_for_each_entry(file, &dev->filelist, lhead) {
17831783
struct amdgpu_fpriv *fpriv = file->driver_priv;
17841784
struct amdgpu_vm *vm = &fpriv->vm;
1785+
struct amdgpu_task_info *ti;
1786+
1787+
ti = amdgpu_vm_get_task_info_vm(vm);
1788+
if (ti) {
1789+
seq_printf(m, "pid:%d\tProcess:%s ----------\n", ti->pid, ti->process_name);
1790+
amdgpu_vm_put_task_info(ti);
1791+
}
17851792

1786-
seq_printf(m, "pid:%d\tProcess:%s ----------\n",
1787-
vm->task_info.pid, vm->task_info.process_name);
17881793
r = amdgpu_bo_reserve(vm->root.bo, true);
17891794
if (r)
17901795
break;

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4056,13 +4056,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
40564056
goto unmap_memory;
40574057
}
40584058

4059+
amdgpu_device_set_mcbp(adev);
4060+
40594061
/* early init functions */
40604062
r = amdgpu_device_ip_early_init(adev);
40614063
if (r)
40624064
goto unmap_memory;
40634065

4064-
amdgpu_device_set_mcbp(adev);
4065-
40664066
/* Get rid of things like offb */
40674067
r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
40684068
if (r)

drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
#include "nbio_v4_3.h"
6262
#include "nbio_v7_2.h"
6363
#include "nbio_v7_7.h"
64+
#include "nbif_v6_3_1.h"
6465
#include "hdp_v5_0.h"
6566
#include "hdp_v5_2.h"
6667
#include "hdp_v6_0.h"
@@ -1319,6 +1320,15 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
13191320
}
13201321
}
13211322

1323+
if (le16_to_cpu(ip->hw_id) == VPE_HWID) {
1324+
if (adev->vpe.num_instances < AMDGPU_MAX_VPE_INSTANCES)
1325+
adev->vpe.num_instances++;
1326+
else
1327+
dev_err(adev->dev, "Too many VPE instances: %d vs %d\n",
1328+
adev->vpe.num_instances + 1,
1329+
AMDGPU_MAX_VPE_INSTANCES);
1330+
}
1331+
13221332
if (le16_to_cpu(ip->hw_id) == UMC_HWID) {
13231333
adev->gmc.num_umc++;
13241334
adev->umc.node_inst_num++;
@@ -1936,6 +1946,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
19361946
case IP_VERSION(3, 2, 0):
19371947
case IP_VERSION(3, 2, 1):
19381948
case IP_VERSION(3, 5, 0):
1949+
case IP_VERSION(3, 5, 1):
19391950
if (amdgpu_sriov_vf(adev))
19401951
amdgpu_discovery_set_sriov_display(adev);
19411952
else
@@ -2212,6 +2223,7 @@ static int amdgpu_discovery_set_vpe_ip_blocks(struct amdgpu_device *adev)
22122223
{
22132224
switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
22142225
case IP_VERSION(6, 1, 0):
2226+
case IP_VERSION(6, 1, 1):
22152227
amdgpu_device_ip_block_add(adev, &vpe_v6_1_ip_block);
22162228
break;
22172229
default:
@@ -2558,6 +2570,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
25582570
adev->nbio.funcs = &nbio_v7_7_funcs;
25592571
adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg;
25602572
break;
2573+
case IP_VERSION(6, 3, 1):
2574+
adev->nbio.funcs = &nbif_v6_3_1_funcs;
2575+
adev->nbio.hdp_flush_reg = &nbif_v6_3_1_hdp_flush_reg;
2576+
break;
25612577
default:
25622578
break;
25632579
}

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2693,7 +2693,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
26932693
}
26942694

26952695
adev->in_runpm = true;
2696-
if (amdgpu_device_supports_px(drm_dev))
2696+
if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX)
26972697
drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
26982698

26992699
/*
@@ -2703,7 +2703,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
27032703
* platforms.
27042704
* TODO: this may be also needed for PX capable platform.
27052705
*/
2706-
if (amdgpu_device_supports_boco(drm_dev))
2706+
if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO)
27072707
adev->mp1_state = PP_MP1_STATE_UNLOAD;
27082708

27092709
ret = amdgpu_device_prepare(drm_dev);
@@ -2712,15 +2712,15 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
27122712
ret = amdgpu_device_suspend(drm_dev, false);
27132713
if (ret) {
27142714
adev->in_runpm = false;
2715-
if (amdgpu_device_supports_boco(drm_dev))
2715+
if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO)
27162716
adev->mp1_state = PP_MP1_STATE_NONE;
27172717
return ret;
27182718
}
27192719

2720-
if (amdgpu_device_supports_boco(drm_dev))
2720+
if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO)
27212721
adev->mp1_state = PP_MP1_STATE_NONE;
27222722

2723-
if (amdgpu_device_supports_px(drm_dev)) {
2723+
if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) {
27242724
/* Only need to handle PCI state in the driver for ATPX
27252725
* PCI core handles it for _PR3.
27262726
*/
@@ -2729,9 +2729,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
27292729
pci_ignore_hotplug(pdev);
27302730
pci_set_power_state(pdev, PCI_D3cold);
27312731
drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
2732-
} else if (amdgpu_device_supports_boco(drm_dev)) {
2732+
} else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) {
27332733
/* nothing to do */
2734-
} else if (amdgpu_device_supports_baco(drm_dev)) {
2734+
} else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
27352735
amdgpu_device_baco_enter(drm_dev);
27362736
}
27372737

@@ -2754,7 +2754,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
27542754
if (!pci_device_is_present(adev->pdev))
27552755
adev->no_hw_access = true;
27562756

2757-
if (amdgpu_device_supports_px(drm_dev)) {
2757+
if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) {
27582758
drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
27592759

27602760
/* Only need to handle PCI state in the driver for ATPX
@@ -2766,22 +2766,22 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
27662766
if (ret)
27672767
return ret;
27682768
pci_set_master(pdev);
2769-
} else if (amdgpu_device_supports_boco(drm_dev)) {
2769+
} else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) {
27702770
/* Only need to handle PCI state in the driver for ATPX
27712771
* PCI core handles it for _PR3.
27722772
*/
27732773
pci_set_master(pdev);
2774-
} else if (amdgpu_device_supports_baco(drm_dev)) {
2774+
} else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
27752775
amdgpu_device_baco_exit(drm_dev);
27762776
}
27772777
ret = amdgpu_device_resume(drm_dev, false);
27782778
if (ret) {
2779-
if (amdgpu_device_supports_px(drm_dev))
2779+
if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX)
27802780
pci_disable_device(pdev);
27812781
return ret;
27822782
}
27832783

2784-
if (amdgpu_device_supports_px(drm_dev))
2784+
if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX)
27852785
drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
27862786
adev->in_runpm = false;
27872787
return 0;

drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,9 +208,15 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
208208
if (!WARN_ON(!vm->process_info->eviction_fence)) {
209209
r = amdgpu_amdkfd_bo_validate_and_fence(abo, AMDGPU_GEM_DOMAIN_GTT,
210210
&vm->process_info->eviction_fence->base);
211-
if (r)
212-
dev_warn(adev->dev, "%d: validate_and_fence failed: %d\n",
213-
vm->task_info.pid, r);
211+
if (r) {
212+
struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm);
213+
214+
dev_warn(adev->dev, "validate_and_fence failed: %d\n", r);
215+
if (ti) {
216+
dev_warn(adev->dev, "pid %d\n", ti->pid);
217+
amdgpu_vm_put_task_info(ti);
218+
}
219+
}
214220
}
215221
mutex_unlock(&vm->process_info->lock);
216222

drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -131,18 +131,18 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
131131
struct amdgpu_ib *ib = &ibs[0];
132132
struct dma_fence *tmp = NULL;
133133
bool need_ctx_switch;
134-
unsigned int patch_offset = ~0;
135134
struct amdgpu_vm *vm;
136135
uint64_t fence_ctx;
137136
uint32_t status = 0, alloc_size;
138137
unsigned int fence_flags = 0;
139138
bool secure, init_shadow;
140139
u64 shadow_va, csa_va, gds_va;
141140
int vmid = AMDGPU_JOB_GET_VMID(job);
141+
bool need_pipe_sync = false;
142+
unsigned int cond_exec;
142143

143144
unsigned int i;
144145
int r = 0;
145-
bool need_pipe_sync = false;
146146

147147
if (num_ibs == 0)
148148
return -EINVAL;
@@ -228,7 +228,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
228228
init_shadow, vmid);
229229

230230
if (ring->funcs->init_cond_exec)
231-
patch_offset = amdgpu_ring_init_cond_exec(ring);
231+
cond_exec = amdgpu_ring_init_cond_exec(ring,
232+
ring->cond_exe_gpu_addr);
232233

233234
amdgpu_device_flush_hdp(adev, ring);
234235

@@ -278,16 +279,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
278279
fence_flags | AMDGPU_FENCE_FLAG_64BIT);
279280
}
280281

281-
if (ring->funcs->emit_gfx_shadow) {
282+
if (ring->funcs->emit_gfx_shadow && ring->funcs->init_cond_exec) {
282283
amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0);
283-
284-
if (ring->funcs->init_cond_exec) {
285-
unsigned int ce_offset = ~0;
286-
287-
ce_offset = amdgpu_ring_init_cond_exec(ring);
288-
if (ce_offset != ~0 && ring->funcs->patch_cond_exec)
289-
amdgpu_ring_patch_cond_exec(ring, ce_offset);
290-
}
284+
amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr);
291285
}
292286

293287
r = amdgpu_fence_emit(ring, f, job, fence_flags);
@@ -302,8 +296,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
302296
if (ring->funcs->insert_end)
303297
ring->funcs->insert_end(ring);
304298

305-
if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
306-
amdgpu_ring_patch_cond_exec(ring, patch_offset);
299+
amdgpu_ring_patch_cond_exec(ring, cond_exec);
307300

308301
ring->current_ctx = fence_ctx;
309302
if (vm && ring->funcs->emit_switch_buffer)

drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
3535
{
3636
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
3737
struct amdgpu_job *job = to_amdgpu_job(s_job);
38-
struct amdgpu_task_info ti;
38+
struct amdgpu_task_info *ti;
3939
struct amdgpu_device *adev = ring->adev;
4040
int idx;
4141
int r;
@@ -48,7 +48,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
4848
return DRM_GPU_SCHED_STAT_ENODEV;
4949
}
5050

51-
memset(&ti, 0, sizeof(struct amdgpu_task_info));
51+
5252
adev->job_hang = true;
5353

5454
if (amdgpu_gpu_recovery &&
@@ -58,12 +58,16 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
5858
goto exit;
5959
}
6060

61-
amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
6261
DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
63-
job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
64-
ring->fence_drv.sync_seq);
65-
DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
66-
ti.process_name, ti.tgid, ti.task_name, ti.pid);
62+
job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
63+
ring->fence_drv.sync_seq);
64+
65+
ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid);
66+
if (ti) {
67+
DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
68+
ti->process_name, ti->tgid, ti->task_name, ti->pid);
69+
amdgpu_vm_put_task_info(ti);
70+
}
6771

6872
dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
6973

drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,13 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
196196
coredump->reset_task_info.process_name,
197197
coredump->reset_task_info.pid);
198198

199+
if (coredump->ring) {
200+
drm_printf(&p, "\nRing timed out details\n");
201+
drm_printf(&p, "IP Type: %d Ring Name: %s\n",
202+
coredump->ring->funcs->type,
203+
coredump->ring->name);
204+
}
205+
199206
if (coredump->reset_vram_lost)
200207
drm_printf(&p, "VRAM is lost due to GPU reset!\n");
201208
if (coredump->adev->reset_info.num_regs) {
@@ -220,6 +227,8 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
220227
{
221228
struct amdgpu_coredump_info *coredump;
222229
struct drm_device *dev = adev_to_drm(adev);
230+
struct amdgpu_job *job = reset_context->job;
231+
struct drm_sched_job *s_job;
223232

224233
coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
225234

@@ -230,8 +239,21 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
230239

231240
coredump->reset_vram_lost = vram_lost;
232241

233-
if (reset_context->job && reset_context->job->vm)
234-
coredump->reset_task_info = reset_context->job->vm->task_info;
242+
if (reset_context->job && reset_context->job->vm) {
243+
struct amdgpu_task_info *ti;
244+
struct amdgpu_vm *vm = reset_context->job->vm;
245+
246+
ti = amdgpu_vm_get_task_info_vm(vm);
247+
if (ti) {
248+
coredump->reset_task_info = *ti;
249+
amdgpu_vm_put_task_info(ti);
250+
}
251+
}
252+
253+
if (job) {
254+
s_job = &job->base;
255+
coredump->ring = to_amdgpu_ring(s_job->sched);
256+
}
235257

236258
coredump->adev = adev;
237259

drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ struct amdgpu_coredump_info {
9797
struct amdgpu_task_info reset_task_info;
9898
struct timespec64 reset_time;
9999
bool reset_vram_lost;
100+
struct amdgpu_ring *ring;
100101
};
101102
#endif
102103

0 commit comments

Comments
 (0)