Skip to content

Commit 48c0c90

Browse files
alexdeucherairlied
authored andcommitted
drm/radeon/kms: add support for CP setup on SI
Signed-off-by: Alex Deucher <[email protected]> Signed-off-by: Dave Airlie <[email protected]>
1 parent 8b074dd commit 48c0c90

File tree

5 files changed

+607
-1
lines changed

5 files changed

+607
-1
lines changed

drivers/gpu/drm/radeon/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
7171
r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \
7272
evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \
7373
radeon_trace_points.o ni.o cayman_blit_shaders.o atombios_encoders.o \
74-
radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o
74+
radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o si_blit_shaders.o
7575

7676
radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
7777
radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o

drivers/gpu/drm/radeon/si.c

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "radeon_drm.h"
3232
#include "sid.h"
3333
#include "atom.h"
34+
#include "si_blit_shaders.h"
3435

3536
#define SI_PFP_UCODE_SIZE 2144
3637
#define SI_PM4_UCODE_SIZE 2144
@@ -1861,6 +1862,272 @@ static void si_gpu_init(struct radeon_device *rdev)
18611862
udelay(50);
18621863
}
18631864

1865+
/*
1866+
* CP.
1867+
*/
1868+
static void si_cp_enable(struct radeon_device *rdev, bool enable)
1869+
{
1870+
if (enable)
1871+
WREG32(CP_ME_CNTL, 0);
1872+
else {
1873+
radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1874+
WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1875+
WREG32(SCRATCH_UMSK, 0);
1876+
}
1877+
udelay(50);
1878+
}
1879+
1880+
static int si_cp_load_microcode(struct radeon_device *rdev)
1881+
{
1882+
const __be32 *fw_data;
1883+
int i;
1884+
1885+
if (!rdev->me_fw || !rdev->pfp_fw)
1886+
return -EINVAL;
1887+
1888+
si_cp_enable(rdev, false);
1889+
1890+
/* PFP */
1891+
fw_data = (const __be32 *)rdev->pfp_fw->data;
1892+
WREG32(CP_PFP_UCODE_ADDR, 0);
1893+
for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
1894+
WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1895+
WREG32(CP_PFP_UCODE_ADDR, 0);
1896+
1897+
/* CE */
1898+
fw_data = (const __be32 *)rdev->ce_fw->data;
1899+
WREG32(CP_CE_UCODE_ADDR, 0);
1900+
for (i = 0; i < SI_CE_UCODE_SIZE; i++)
1901+
WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1902+
WREG32(CP_CE_UCODE_ADDR, 0);
1903+
1904+
/* ME */
1905+
fw_data = (const __be32 *)rdev->me_fw->data;
1906+
WREG32(CP_ME_RAM_WADDR, 0);
1907+
for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
1908+
WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1909+
WREG32(CP_ME_RAM_WADDR, 0);
1910+
1911+
WREG32(CP_PFP_UCODE_ADDR, 0);
1912+
WREG32(CP_CE_UCODE_ADDR, 0);
1913+
WREG32(CP_ME_RAM_WADDR, 0);
1914+
WREG32(CP_ME_RAM_RADDR, 0);
1915+
return 0;
1916+
}
1917+
1918+
static int si_cp_start(struct radeon_device *rdev)
1919+
{
1920+
struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1921+
int r, i;
1922+
1923+
r = radeon_ring_lock(rdev, ring, 7 + 4);
1924+
if (r) {
1925+
DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1926+
return r;
1927+
}
1928+
/* init the CP */
1929+
radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
1930+
radeon_ring_write(ring, 0x1);
1931+
radeon_ring_write(ring, 0x0);
1932+
radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
1933+
radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1934+
radeon_ring_write(ring, 0);
1935+
radeon_ring_write(ring, 0);
1936+
1937+
/* init the CE partitions */
1938+
radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1939+
radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1940+
radeon_ring_write(ring, 0xc000);
1941+
radeon_ring_write(ring, 0xe000);
1942+
radeon_ring_unlock_commit(rdev, ring);
1943+
1944+
si_cp_enable(rdev, true);
1945+
1946+
r = radeon_ring_lock(rdev, ring, si_default_size + 10);
1947+
if (r) {
1948+
DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1949+
return r;
1950+
}
1951+
1952+
/* setup clear context state */
1953+
radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1954+
radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1955+
1956+
for (i = 0; i < si_default_size; i++)
1957+
radeon_ring_write(ring, si_default_state[i]);
1958+
1959+
radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1960+
radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1961+
1962+
/* set clear context state */
1963+
radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1964+
radeon_ring_write(ring, 0);
1965+
1966+
radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1967+
radeon_ring_write(ring, 0x00000316);
1968+
radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1969+
radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
1970+
1971+
radeon_ring_unlock_commit(rdev, ring);
1972+
1973+
for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
1974+
ring = &rdev->ring[i];
1975+
r = radeon_ring_lock(rdev, ring, 2);
1976+
1977+
/* clear the compute context state */
1978+
radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
1979+
radeon_ring_write(ring, 0);
1980+
1981+
radeon_ring_unlock_commit(rdev, ring);
1982+
}
1983+
1984+
return 0;
1985+
}
1986+
1987+
static void si_cp_fini(struct radeon_device *rdev)
1988+
{
1989+
si_cp_enable(rdev, false);
1990+
radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1991+
radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
1992+
radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
1993+
}
1994+
1995+
static int si_cp_resume(struct radeon_device *rdev)
1996+
{
1997+
struct radeon_ring *ring;
1998+
u32 tmp;
1999+
u32 rb_bufsz;
2000+
int r;
2001+
2002+
/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
2003+
WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
2004+
SOFT_RESET_PA |
2005+
SOFT_RESET_VGT |
2006+
SOFT_RESET_SPI |
2007+
SOFT_RESET_SX));
2008+
RREG32(GRBM_SOFT_RESET);
2009+
mdelay(15);
2010+
WREG32(GRBM_SOFT_RESET, 0);
2011+
RREG32(GRBM_SOFT_RESET);
2012+
2013+
WREG32(CP_SEM_WAIT_TIMER, 0x0);
2014+
WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2015+
2016+
/* Set the write pointer delay */
2017+
WREG32(CP_RB_WPTR_DELAY, 0);
2018+
2019+
WREG32(CP_DEBUG, 0);
2020+
WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2021+
2022+
/* ring 0 - compute and gfx */
2023+
/* Set ring buffer size */
2024+
ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2025+
rb_bufsz = drm_order(ring->ring_size / 8);
2026+
tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2027+
#ifdef __BIG_ENDIAN
2028+
tmp |= BUF_SWAP_32BIT;
2029+
#endif
2030+
WREG32(CP_RB0_CNTL, tmp);
2031+
2032+
/* Initialize the ring buffer's read and write pointers */
2033+
WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2034+
ring->wptr = 0;
2035+
WREG32(CP_RB0_WPTR, ring->wptr);
2036+
2037+
/* set the wb address wether it's enabled or not */
2038+
WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2039+
WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2040+
2041+
if (rdev->wb.enabled)
2042+
WREG32(SCRATCH_UMSK, 0xff);
2043+
else {
2044+
tmp |= RB_NO_UPDATE;
2045+
WREG32(SCRATCH_UMSK, 0);
2046+
}
2047+
2048+
mdelay(1);
2049+
WREG32(CP_RB0_CNTL, tmp);
2050+
2051+
WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
2052+
2053+
ring->rptr = RREG32(CP_RB0_RPTR);
2054+
2055+
/* ring1 - compute only */
2056+
/* Set ring buffer size */
2057+
ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
2058+
rb_bufsz = drm_order(ring->ring_size / 8);
2059+
tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2060+
#ifdef __BIG_ENDIAN
2061+
tmp |= BUF_SWAP_32BIT;
2062+
#endif
2063+
WREG32(CP_RB1_CNTL, tmp);
2064+
2065+
/* Initialize the ring buffer's read and write pointers */
2066+
WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
2067+
ring->wptr = 0;
2068+
WREG32(CP_RB1_WPTR, ring->wptr);
2069+
2070+
/* set the wb address wether it's enabled or not */
2071+
WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
2072+
WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
2073+
2074+
mdelay(1);
2075+
WREG32(CP_RB1_CNTL, tmp);
2076+
2077+
WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
2078+
2079+
ring->rptr = RREG32(CP_RB1_RPTR);
2080+
2081+
/* ring2 - compute only */
2082+
/* Set ring buffer size */
2083+
ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2084+
rb_bufsz = drm_order(ring->ring_size / 8);
2085+
tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2086+
#ifdef __BIG_ENDIAN
2087+
tmp |= BUF_SWAP_32BIT;
2088+
#endif
2089+
WREG32(CP_RB2_CNTL, tmp);
2090+
2091+
/* Initialize the ring buffer's read and write pointers */
2092+
WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
2093+
ring->wptr = 0;
2094+
WREG32(CP_RB2_WPTR, ring->wptr);
2095+
2096+
/* set the wb address wether it's enabled or not */
2097+
WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
2098+
WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
2099+
2100+
mdelay(1);
2101+
WREG32(CP_RB2_CNTL, tmp);
2102+
2103+
WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
2104+
2105+
ring->rptr = RREG32(CP_RB2_RPTR);
2106+
2107+
/* start the rings */
2108+
si_cp_start(rdev);
2109+
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2110+
rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
2111+
rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
2112+
r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2113+
if (r) {
2114+
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2115+
rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2116+
rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2117+
return r;
2118+
}
2119+
r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
2120+
if (r) {
2121+
rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2122+
}
2123+
r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
2124+
if (r) {
2125+
rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2126+
}
2127+
2128+
return 0;
2129+
}
2130+
18642131
bool si_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
18652132
{
18662133
u32 srbm_status;

0 commit comments

Comments
 (0)