Skip to content

Commit 5c03e58

Browse files
Feifei Xualexdeucher
authored andcommitted
drm/amdgpu:add smu mode1/2 support for aldebaran
Use MSG_GfxDriverReset for mode reset and retire MSG_Mode1Reset. Centralize soc15_asic_mode1_reset() and nv_asic_mode1_reset()functions. Add mode2_reset_is_support() for smu->ppt_funcs. Signed-off-by: Feifei Xu <[email protected]> Reviewed-by: Hawking Zhang <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 4c2e5f5 commit 5c03e58

File tree

11 files changed

+164
-82
lines changed

11 files changed

+164
-82
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1261,6 +1261,7 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
12611261
const u32 array_size);
12621262

12631263
bool amdgpu_device_supports_atpx(struct drm_device *dev);
1264+
int amdgpu_device_mode1_reset(struct amdgpu_device *adev);
12641265
bool amdgpu_device_supports_boco(struct drm_device *dev);
12651266
bool amdgpu_device_supports_baco(struct drm_device *dev);
12661267
bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4248,6 +4248,45 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
42484248
return false;
42494249
}
42504250

4251+
int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4252+
{
4253+
u32 i;
4254+
int ret = 0;
4255+
4256+
amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4257+
4258+
dev_info(adev->dev, "GPU mode1 reset\n");
4259+
4260+
/* disable BM */
4261+
pci_clear_master(adev->pdev);
4262+
4263+
amdgpu_device_cache_pci_state(adev->pdev);
4264+
4265+
if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4266+
dev_info(adev->dev, "GPU smu mode1 reset\n");
4267+
ret = amdgpu_dpm_mode1_reset(adev);
4268+
} else {
4269+
dev_info(adev->dev, "GPU psp mode1 reset\n");
4270+
ret = psp_gpu_reset(adev);
4271+
}
4272+
4273+
if (ret)
4274+
dev_err(adev->dev, "GPU mode1 reset failed\n");
4275+
4276+
amdgpu_device_load_pci_state(adev->pdev);
4277+
4278+
/* wait for asic to come out of reset */
4279+
for (i = 0; i < adev->usec_timeout; i++) {
4280+
u32 memsize = adev->nbio.funcs->get_memsize(adev);
4281+
4282+
if (memsize != 0xffffffff)
4283+
break;
4284+
udelay(1);
4285+
}
4286+
4287+
amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4288+
return ret;
4289+
}
42514290

42524291
static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
42534292
struct amdgpu_job *job,

drivers/gpu/drm/amd/amdgpu/nv.c

Lines changed: 1 addition & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -484,44 +484,6 @@ static int nv_read_register(struct amdgpu_device *adev, u32 se_num,
484484
return -EINVAL;
485485
}
486486

487-
static int nv_asic_mode1_reset(struct amdgpu_device *adev)
488-
{
489-
u32 i;
490-
int ret = 0;
491-
492-
amdgpu_atombios_scratch_regs_engine_hung(adev, true);
493-
494-
/* disable BM */
495-
pci_clear_master(adev->pdev);
496-
497-
amdgpu_device_cache_pci_state(adev->pdev);
498-
499-
if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
500-
dev_info(adev->dev, "GPU smu mode1 reset\n");
501-
ret = amdgpu_dpm_mode1_reset(adev);
502-
} else {
503-
dev_info(adev->dev, "GPU psp mode1 reset\n");
504-
ret = psp_gpu_reset(adev);
505-
}
506-
507-
if (ret)
508-
dev_err(adev->dev, "GPU mode1 reset failed\n");
509-
amdgpu_device_load_pci_state(adev->pdev);
510-
511-
/* wait for asic to come out of reset */
512-
for (i = 0; i < adev->usec_timeout; i++) {
513-
u32 memsize = adev->nbio.funcs->get_memsize(adev);
514-
515-
if (memsize != 0xffffffff)
516-
break;
517-
udelay(1);
518-
}
519-
520-
amdgpu_atombios_scratch_regs_engine_hung(adev, false);
521-
522-
return ret;
523-
}
524-
525487
static int nv_asic_mode2_reset(struct amdgpu_device *adev)
526488
{
527489
u32 i;
@@ -624,7 +586,7 @@ static int nv_asic_reset(struct amdgpu_device *adev)
624586
break;
625587
default:
626588
dev_info(adev->dev, "MODE1 reset\n");
627-
ret = nv_asic_mode1_reset(adev);
589+
ret = amdgpu_device_mode1_reset(adev);
628590
break;
629591
}
630592

drivers/gpu/drm/amd/amdgpu/soc15.c

Lines changed: 19 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -650,40 +650,6 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
650650

651651
}
652652

653-
static int soc15_asic_mode1_reset(struct amdgpu_device *adev)
654-
{
655-
u32 i;
656-
int ret = 0;
657-
658-
amdgpu_atombios_scratch_regs_engine_hung(adev, true);
659-
660-
dev_info(adev->dev, "GPU mode1 reset\n");
661-
662-
/* disable BM */
663-
pci_clear_master(adev->pdev);
664-
665-
amdgpu_device_cache_pci_state(adev->pdev);
666-
667-
ret = psp_gpu_reset(adev);
668-
if (ret)
669-
dev_err(adev->dev, "GPU mode1 reset failed\n");
670-
671-
amdgpu_device_load_pci_state(adev->pdev);
672-
673-
/* wait for asic to come out of reset */
674-
for (i = 0; i < adev->usec_timeout; i++) {
675-
u32 memsize = adev->nbio.funcs->get_memsize(adev);
676-
677-
if (memsize != 0xffffffff)
678-
break;
679-
udelay(1);
680-
}
681-
682-
amdgpu_atombios_scratch_regs_engine_hung(adev, false);
683-
684-
return ret;
685-
}
686-
687653
static int soc15_asic_baco_reset(struct amdgpu_device *adev)
688654
{
689655
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
@@ -708,13 +674,21 @@ static enum amd_reset_method
708674
soc15_asic_reset_method(struct amdgpu_device *adev)
709675
{
710676
bool baco_reset = false;
677+
bool connected_to_cpu = false;
711678
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
712679

680+
if (adev->gmc.xgmi.supported && adev->gmc.xgmi.connected_to_cpu)
681+
connected_to_cpu = true;
682+
713683
if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
714684
amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
715685
amdgpu_reset_method == AMD_RESET_METHOD_BACO ||
716-
amdgpu_reset_method == AMD_RESET_METHOD_PCI)
717-
return amdgpu_reset_method;
686+
amdgpu_reset_method == AMD_RESET_METHOD_PCI) {
687+
/* If connected to cpu, driver only support mode2 */
688+
if (connected_to_cpu)
689+
return AMD_RESET_METHOD_MODE2;
690+
return amdgpu_reset_method;
691+
}
718692

719693
if (amdgpu_reset_method != -1)
720694
dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
@@ -740,6 +714,14 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
740714
if ((ras && ras->supported) && adev->pm.fw_version <= 0x283400)
741715
baco_reset = false;
742716
break;
717+
case CHIP_ALDEBARAN:
718+
/*
719+
* 1.connected to cpu: driver issue mode2 reset
720+
* 2.discret gpu: driver issue mode1 reset
721+
*/
722+
if (connected_to_cpu)
723+
return AMD_RESET_METHOD_MODE2;
724+
break;
743725
default:
744726
break;
745727
}
@@ -769,7 +751,7 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
769751
return amdgpu_dpm_mode2_reset(adev);
770752
default:
771753
dev_info(adev->dev, "MODE1 reset\n");
772-
return soc15_asic_mode1_reset(adev);
754+
return amdgpu_device_mode1_reset(adev);
773755
}
774756
}
775757

drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
// Message Definitions:
3737
#define PPSMC_MSG_TestMessage 0x1
3838
#define PPSMC_MSG_GetSmuVersion 0x2
39-
#define PPSMC_MSG_Mode1Reset 0x3
39+
#define PPSMC_MSG_GfxDriverReset 0x3
4040
#define PPSMC_MSG_GetDriverIfVersion 0x4
4141
#define PPSMC_MSG_spare1 0x5
4242
#define PPSMC_MSG_spare2 0x6
@@ -70,8 +70,8 @@
7070
#define PPSMC_MSG_SetPptLimit 0x22
7171
#define PPSMC_MSG_GetPptLimit 0x23
7272
#define PPSMC_MSG_PrepareMp1ForUnload 0x24
73-
#define PPSMC_MSG_PrepareMp1ForReset 0x25
74-
#define PPSMC_MSG_SoftReset 0x26
73+
#define PPSMC_MSG_PrepareMp1ForReset 0x25 //retired in 68.07
74+
#define PPSMC_MSG_SoftReset 0x26 //retired in 68.07
7575
#define PPSMC_MSG_RunDcBtc 0x27
7676
#define PPSMC_MSG_DramLogSetDramAddrHigh 0x28
7777
#define PPSMC_MSG_DramLogSetDramAddrLow 0x29
@@ -92,7 +92,24 @@
9292
#define PPSMC_MSG_DisableDeterminism 0x3A
9393
#define PPSMC_MSG_SetUclkDpmMode 0x3B
9494

95-
#define PPSMC_Message_Count 0x3C
95+
//STB to dram log
96+
#define PPSMC_MSG_DumpSTBtoDram 0x3C
97+
#define PPSMC_MSG_STBtoDramLogSetDramAddrHigh 0x3D
98+
#define PPSMC_MSG_STBtoDramLogSetDramAddrLow 0x3E
99+
#define PPSMC_MSG_STBtoDramLogSetDramSize 0x3F
100+
#define PPSMC_MSG_SetSystemVirtualSTBtoDramAddrHigh 0x40
101+
#define PPSMC_MSG_SetSystemVirtualSTBtoDramAddrLow 0x41
102+
103+
#define PPSMC_Message_Count 0x42
104+
105+
//PPSMC Reset Types
106+
#define PPSMC_RESET_TYPE_WARM_RESET 0x00
107+
#define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x01 //driver msg argument should be 1 for mode-1
108+
#define PPSMC_RESET_TYPE_DRIVER_MODE_2_RESET 0x02 //and 2 for mode-2
109+
#define PPSMC_RESET_TYPE_PCIE_LINK_RESET 0x03
110+
#define PPSMC_RESET_TYPE_BIF_LINK_RESET 0x04
111+
#define PPSMC_RESET_TYPE_PF0_FLR_RESET 0x05
112+
96113

97114
typedef enum {
98115
GFXOFF_ERROR_NO_ERROR,

drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1044,6 +1044,10 @@ struct pptable_funcs {
10441044
* @mode1_reset_is_support: Check if GPU supports mode1 reset.
10451045
*/
10461046
bool (*mode1_reset_is_support)(struct smu_context *smu);
1047+
/**
1048+
* @mode2_reset_is_support: Check if GPU supports mode2 reset.
1049+
*/
1050+
bool (*mode2_reset_is_support)(struct smu_context *smu);
10471051

10481052
/**
10491053
* @mode1_reset: Perform mode1 reset.
@@ -1279,6 +1283,7 @@ int smu_baco_set_state(void *handle, int state);
12791283

12801284

12811285
bool smu_mode1_reset_is_support(struct smu_context *smu);
1286+
bool smu_mode2_reset_is_support(struct smu_context *smu);
12821287
int smu_mode1_reset(struct smu_context *smu);
12831288
int smu_mode2_reset(void *handle);
12841289

drivers/gpu/drm/amd/pm/inc/smu_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@
184184
__SMU_DUMMY_MAP(GET_UMC_FW_WA), \
185185
__SMU_DUMMY_MAP(Mode1Reset), \
186186
__SMU_DUMMY_MAP(RlcPowerNotify), \
187+
__SMU_DUMMY_MAP(GfxDriverReset), \
187188
__SMU_DUMMY_MAP(SetHardMinIspiclkByFreq), \
188189
__SMU_DUMMY_MAP(SetHardMinIspxclkByFreq), \
189190
__SMU_DUMMY_MAP(SetSoftMinSocclkByFreq), \

drivers/gpu/drm/amd/pm/inc/smu_v13_0.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ int smu_v13_0_baco_enter(struct smu_context *smu);
220220
int smu_v13_0_baco_exit(struct smu_context *smu);
221221

222222
int smu_v13_0_mode1_reset(struct smu_context *smu);
223+
int smu_v13_0_mode2_reset(struct smu_context *smu);
223224

224225
int smu_v13_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type,
225226
uint32_t *min, uint32_t *max);

drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1917,6 +1917,9 @@ int smu_set_mp1_state(void *handle,
19171917
msg = SMU_MSG_PrepareMp1ForUnload;
19181918
break;
19191919
case PP_MP1_STATE_RESET:
1920+
/*TODO: since the SMU_MSG_PrepareMp1ForReset is retired in Aldebaran
1921+
* Add handling here forAldebaran.
1922+
*/
19201923
msg = SMU_MSG_PrepareMp1ForReset;
19211924
break;
19221925
case PP_MP1_STATE_NONE:
@@ -2788,6 +2791,23 @@ bool smu_mode1_reset_is_support(struct smu_context *smu)
27882791
return ret;
27892792
}
27902793

2794+
bool smu_mode2_reset_is_support(struct smu_context *smu)
2795+
{
2796+
bool ret = false;
2797+
2798+
if (!smu->pm_enabled)
2799+
return false;
2800+
2801+
mutex_lock(&smu->mutex);
2802+
2803+
if (smu->ppt_funcs && smu->ppt_funcs->mode2_reset_is_support)
2804+
ret = smu->ppt_funcs->mode2_reset_is_support(smu);
2805+
2806+
mutex_unlock(&smu->mutex);
2807+
2808+
return ret;
2809+
}
2810+
27912811
int smu_mode1_reset(struct smu_context *smu)
27922812
{
27932813
int ret = 0;

drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include <linux/pci.h>
4646
#include "amdgpu_ras.h"
4747
#include "smu_cmn.h"
48+
#include "mp/mp_13_0_2_offset.h"
4849

4950
/*
5051
* DO NOT use these for err/warn/info/debug messages.
@@ -108,7 +109,7 @@ static const struct cmn2asic_msg_mapping aldebaran_message_map[SMU_MSG_MAX_COUNT
108109
MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 1),
109110
MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0),
110111
MSG_MAP(PrepareMp1ForReset, PPSMC_MSG_PrepareMp1ForReset, 0),
111-
MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0),
112+
MSG_MAP(GfxDriverReset, PPSMC_MSG_GfxDriverReset, 0),
112113
MSG_MAP(SoftReset, PPSMC_MSG_SoftReset, 0),
113114
MSG_MAP(RunDcBtc, PPSMC_MSG_RunDcBtc, 0),
114115
MSG_MAP(DramLogSetDramAddrHigh, PPSMC_MSG_DramLogSetDramAddrHigh, 0),
@@ -1250,6 +1251,31 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
12501251
return sizeof(struct gpu_metrics_v1_0);
12511252
}
12521253

1254+
static bool aldebaran_is_mode1_reset_supported(struct smu_context *smu)
1255+
{
1256+
struct amdgpu_device *adev = smu->adev;
1257+
u32 smu_version;
1258+
uint32_t val;
1259+
/**
1260+
* PM FW version support mode1 reset from 68.07
1261+
*/
1262+
smu_cmn_get_smc_version(smu, NULL, &smu_version);
1263+
if ((smu_version < 0x00440700))
1264+
return false;
1265+
/**
1266+
* mode1 reset relies on PSP, so we should check if
1267+
* PSP is alive.
1268+
*/
1269+
val = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
1270+
1271+
return val != 0x0;
1272+
}
1273+
1274+
static bool aldebaran_is_mode2_reset_supported(struct smu_context *smu)
1275+
{
1276+
return true;
1277+
}
1278+
12531279
static const struct pptable_funcs aldebaran_ppt_funcs = {
12541280
/* init dpm */
12551281
.get_allowed_feature_mask = aldebaran_get_allowed_feature_mask,
@@ -1305,6 +1331,10 @@ static const struct pptable_funcs aldebaran_ppt_funcs = {
13051331
.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
13061332
.set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
13071333
.get_gpu_metrics = aldebaran_get_gpu_metrics,
1334+
.mode1_reset_is_support = aldebaran_is_mode1_reset_supported,
1335+
.mode2_reset_is_support = aldebaran_is_mode2_reset_supported,
1336+
.mode1_reset = smu_v13_0_mode1_reset,
1337+
.mode2_reset = smu_v13_0_mode2_reset,
13081338
};
13091339

13101340
void aldebaran_set_ppt_funcs(struct smu_context *smu)

0 commit comments

Comments
 (0)