Skip to content

Commit 474dadb

Browse files
Sharat Masettyrobclark
authored andcommitted
drm/msm/a6xx: Add support for using system cache(LLC)
The last level system cache can be partitioned to 32 different slices of which GPU has two slices preallocated. One slice is used for caching GPU buffers and the other slice is used for caching the GPU SMMU pagetables. This talks to the core system cache driver to acquire the slice handles, configure the SCID's to those slices and activates and deactivates the slices upon GPU power collapse and restore. Some support from the IOMMU driver is also needed to make use of the system cache to set the right TCR attributes. GPU then has the ability to override a few cacheability parameters which it does to override write-allocate to write-no-allocate as the GPU hardware does not benefit much from it. DOMAIN_ATTR_IO_PGTABLE_CFG is another domain level attribute used by the IOMMU driver for pagetable configuration which will be used to set a quirk initially to set the right attributes to cache the hardware pagetables into the system cache. Signed-off-by: Sharat Masetty <[email protected]> [saiprakash.ranjan: fix to set attr before device attach to iommu and rebase] Signed-off-by: Sai Prakash Ranjan <[email protected]> Signed-off-by: Rob Clark <[email protected]>
1 parent 40a72b0 commit 474dadb

File tree

3 files changed

+104
-0
lines changed

3 files changed

+104
-0
lines changed

drivers/gpu/drm/msm/adreno/a6xx_gpu.c

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
#include "a6xx_gpu.h"
99
#include "a6xx_gmu.xml.h"
1010

11+
#include <linux/bitfield.h>
1112
#include <linux/devfreq.h>
13+
#include <linux/soc/qcom/llcc-qcom.h>
1214

1315
#define GPU_PAS_ID 13
1416

@@ -1020,6 +1022,79 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
10201022
return IRQ_HANDLED;
10211023
}
10221024

1025+
static void a6xx_llc_rmw(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 mask, u32 or)
1026+
{
1027+
return msm_rmw(a6xx_gpu->llc_mmio + (reg << 2), mask, or);
1028+
}
1029+
1030+
static void a6xx_llc_write(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 value)
1031+
{
1032+
return msm_writel(value, a6xx_gpu->llc_mmio + (reg << 2));
1033+
}
1034+
1035+
static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
1036+
{
1037+
llcc_slice_deactivate(a6xx_gpu->llc_slice);
1038+
llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
1039+
}
1040+
1041+
static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1042+
{
1043+
u32 cntl1_regval = 0;
1044+
1045+
if (IS_ERR(a6xx_gpu->llc_mmio))
1046+
return;
1047+
1048+
if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1049+
u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1050+
1051+
gpu_scid &= 0x1f;
1052+
cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
1053+
(gpu_scid << 15) | (gpu_scid << 20);
1054+
}
1055+
1056+
if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
1057+
u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
1058+
1059+
gpuhtw_scid &= 0x1f;
1060+
cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
1061+
}
1062+
1063+
if (cntl1_regval) {
1064+
/*
1065+
* Program the slice IDs for the various GPU blocks and GPU MMU
1066+
* pagetables
1067+
*/
1068+
a6xx_llc_write(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval);
1069+
1070+
/*
1071+
* Program cacheability overrides to not allocate cache lines on
1072+
* a write miss
1073+
*/
1074+
a6xx_llc_rmw(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03);
1075+
}
1076+
}
1077+
1078+
static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
1079+
{
1080+
llcc_slice_putd(a6xx_gpu->llc_slice);
1081+
llcc_slice_putd(a6xx_gpu->htw_llc_slice);
1082+
}
1083+
1084+
static void a6xx_llc_slices_init(struct platform_device *pdev,
1085+
struct a6xx_gpu *a6xx_gpu)
1086+
{
1087+
a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx");
1088+
if (IS_ERR(a6xx_gpu->llc_mmio))
1089+
return;
1090+
1091+
a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
1092+
a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
1093+
1094+
if (IS_ERR(a6xx_gpu->llc_slice) && IS_ERR(a6xx_gpu->htw_llc_slice))
1095+
a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
1096+
}
1097+
10231098
static int a6xx_pm_resume(struct msm_gpu *gpu)
10241099
{
10251100
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -1036,6 +1111,8 @@ static int a6xx_pm_resume(struct msm_gpu *gpu)
10361111

10371112
msm_gpu_resume_devfreq(gpu);
10381113

1114+
a6xx_llc_activate(a6xx_gpu);
1115+
10391116
return 0;
10401117
}
10411118

@@ -1047,6 +1124,8 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
10471124

10481125
trace_msm_gpu_suspend(0);
10491126

1127+
a6xx_llc_deactivate(a6xx_gpu);
1128+
10501129
devfreq_suspend_device(gpu->devfreq.devfreq);
10511130

10521131
ret = a6xx_gmu_stop(a6xx_gpu);
@@ -1098,6 +1177,8 @@ static void a6xx_destroy(struct msm_gpu *gpu)
10981177
drm_gem_object_put(a6xx_gpu->shadow_bo);
10991178
}
11001179

1180+
a6xx_llc_slices_destroy(a6xx_gpu);
1181+
11011182
a6xx_gmu_remove(a6xx_gpu);
11021183

11031184
adreno_gpu_cleanup(adreno_gpu);
@@ -1216,6 +1297,8 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
12161297
if (info && info->revn == 650)
12171298
adreno_gpu->base.hw_apriv = true;
12181299

1300+
a6xx_llc_slices_init(pdev, a6xx_gpu);
1301+
12191302
ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
12201303
if (ret) {
12211304
a6xx_destroy(&(a6xx_gpu->base.base));

drivers/gpu/drm/msm/adreno/a6xx_gpu.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ struct a6xx_gpu {
2828
uint32_t *shadow;
2929

3030
bool has_whereami;
31+
32+
void __iomem *llc_mmio;
33+
void *llc_slice;
34+
void *htw_llc_slice;
3135
};
3236

3337
#define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base)

drivers/gpu/drm/msm/adreno/adreno_gpu.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <linux/soc/qcom/mdt_loader.h>
1717
#include <soc/qcom/ocmem.h>
1818
#include "adreno_gpu.h"
19+
#include "a6xx_gpu.h"
1920
#include "msm_gem.h"
2021
#include "msm_mmu.h"
2122

@@ -189,6 +190,9 @@ struct msm_gem_address_space *
189190
adreno_iommu_create_address_space(struct msm_gpu *gpu,
190191
struct platform_device *pdev)
191192
{
193+
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
194+
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
195+
struct io_pgtable_domain_attr pgtbl_cfg;
192196
struct iommu_domain *iommu;
193197
struct msm_mmu *mmu;
194198
struct msm_gem_address_space *aspace;
@@ -198,7 +202,20 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu,
198202
if (!iommu)
199203
return NULL;
200204

205+
/*
206+
* This allows GPU to set the bus attributes required to use system
207+
* cache on behalf of the iommu page table walker.
208+
*/
209+
if (!IS_ERR(a6xx_gpu->htw_llc_slice)) {
210+
pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
211+
iommu_domain_set_attr(iommu, DOMAIN_ATTR_IO_PGTABLE_CFG, &pgtbl_cfg);
212+
}
213+
201214
mmu = msm_iommu_new(&pdev->dev, iommu);
215+
if (IS_ERR(mmu)) {
216+
iommu_domain_free(iommu);
217+
return ERR_CAST(mmu);
218+
}
202219

203220
/*
204221
* Use the aperture start or SZ_16M, whichever is greater. This will

0 commit comments

Comments
 (0)