|
31 | 31 | #include "radeon_drm.h"
|
32 | 32 | #include "sid.h"
|
33 | 33 | #include "atom.h"
|
| 34 | +#include "si_blit_shaders.h" |
34 | 35 |
|
35 | 36 | #define SI_PFP_UCODE_SIZE 2144
|
36 | 37 | #define SI_PM4_UCODE_SIZE 2144
|
@@ -1861,6 +1862,272 @@ static void si_gpu_init(struct radeon_device *rdev)
|
1861 | 1862 | udelay(50);
|
1862 | 1863 | }
|
1863 | 1864 |
|
| 1865 | +/* |
| 1866 | + * CP. |
| 1867 | + */ |
| 1868 | +static void si_cp_enable(struct radeon_device *rdev, bool enable) |
| 1869 | +{ |
| 1870 | + if (enable) |
| 1871 | + WREG32(CP_ME_CNTL, 0); |
| 1872 | + else { |
| 1873 | + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); |
| 1874 | + WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT)); |
| 1875 | + WREG32(SCRATCH_UMSK, 0); |
| 1876 | + } |
| 1877 | + udelay(50); |
| 1878 | +} |
| 1879 | + |
| 1880 | +static int si_cp_load_microcode(struct radeon_device *rdev) |
| 1881 | +{ |
| 1882 | + const __be32 *fw_data; |
| 1883 | + int i; |
| 1884 | + |
| 1885 | + if (!rdev->me_fw || !rdev->pfp_fw) |
| 1886 | + return -EINVAL; |
| 1887 | + |
| 1888 | + si_cp_enable(rdev, false); |
| 1889 | + |
| 1890 | + /* PFP */ |
| 1891 | + fw_data = (const __be32 *)rdev->pfp_fw->data; |
| 1892 | + WREG32(CP_PFP_UCODE_ADDR, 0); |
| 1893 | + for (i = 0; i < SI_PFP_UCODE_SIZE; i++) |
| 1894 | + WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++)); |
| 1895 | + WREG32(CP_PFP_UCODE_ADDR, 0); |
| 1896 | + |
| 1897 | + /* CE */ |
| 1898 | + fw_data = (const __be32 *)rdev->ce_fw->data; |
| 1899 | + WREG32(CP_CE_UCODE_ADDR, 0); |
| 1900 | + for (i = 0; i < SI_CE_UCODE_SIZE; i++) |
| 1901 | + WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++)); |
| 1902 | + WREG32(CP_CE_UCODE_ADDR, 0); |
| 1903 | + |
| 1904 | + /* ME */ |
| 1905 | + fw_data = (const __be32 *)rdev->me_fw->data; |
| 1906 | + WREG32(CP_ME_RAM_WADDR, 0); |
| 1907 | + for (i = 0; i < SI_PM4_UCODE_SIZE; i++) |
| 1908 | + WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++)); |
| 1909 | + WREG32(CP_ME_RAM_WADDR, 0); |
| 1910 | + |
| 1911 | + WREG32(CP_PFP_UCODE_ADDR, 0); |
| 1912 | + WREG32(CP_CE_UCODE_ADDR, 0); |
| 1913 | + WREG32(CP_ME_RAM_WADDR, 0); |
| 1914 | + WREG32(CP_ME_RAM_RADDR, 0); |
| 1915 | + return 0; |
| 1916 | +} |
| 1917 | + |
| 1918 | +static int si_cp_start(struct radeon_device *rdev) |
| 1919 | +{ |
| 1920 | + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; |
| 1921 | + int r, i; |
| 1922 | + |
| 1923 | + r = radeon_ring_lock(rdev, ring, 7 + 4); |
| 1924 | + if (r) { |
| 1925 | + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); |
| 1926 | + return r; |
| 1927 | + } |
| 1928 | + /* init the CP */ |
| 1929 | + radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5)); |
| 1930 | + radeon_ring_write(ring, 0x1); |
| 1931 | + radeon_ring_write(ring, 0x0); |
| 1932 | + radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1); |
| 1933 | + radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1)); |
| 1934 | + radeon_ring_write(ring, 0); |
| 1935 | + radeon_ring_write(ring, 0); |
| 1936 | + |
| 1937 | + /* init the CE partitions */ |
| 1938 | + radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); |
| 1939 | + radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); |
| 1940 | + radeon_ring_write(ring, 0xc000); |
| 1941 | + radeon_ring_write(ring, 0xe000); |
| 1942 | + radeon_ring_unlock_commit(rdev, ring); |
| 1943 | + |
| 1944 | + si_cp_enable(rdev, true); |
| 1945 | + |
| 1946 | + r = radeon_ring_lock(rdev, ring, si_default_size + 10); |
| 1947 | + if (r) { |
| 1948 | + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); |
| 1949 | + return r; |
| 1950 | + } |
| 1951 | + |
| 1952 | + /* setup clear context state */ |
| 1953 | + radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); |
| 1954 | + radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); |
| 1955 | + |
| 1956 | + for (i = 0; i < si_default_size; i++) |
| 1957 | + radeon_ring_write(ring, si_default_state[i]); |
| 1958 | + |
| 1959 | + radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); |
| 1960 | + radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); |
| 1961 | + |
| 1962 | + /* set clear context state */ |
| 1963 | + radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); |
| 1964 | + radeon_ring_write(ring, 0); |
| 1965 | + |
| 1966 | + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); |
| 1967 | + radeon_ring_write(ring, 0x00000316); |
| 1968 | + radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ |
| 1969 | + radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ |
| 1970 | + |
| 1971 | + radeon_ring_unlock_commit(rdev, ring); |
| 1972 | + |
| 1973 | + for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) { |
| 1974 | + ring = &rdev->ring[i]; |
| 1975 | + r = radeon_ring_lock(rdev, ring, 2); |
| 1976 | + |
| 1977 | + /* clear the compute context state */ |
| 1978 | + radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0)); |
| 1979 | + radeon_ring_write(ring, 0); |
| 1980 | + |
| 1981 | + radeon_ring_unlock_commit(rdev, ring); |
| 1982 | + } |
| 1983 | + |
| 1984 | + return 0; |
| 1985 | +} |
| 1986 | + |
| 1987 | +static void si_cp_fini(struct radeon_device *rdev) |
| 1988 | +{ |
| 1989 | + si_cp_enable(rdev, false); |
| 1990 | + radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); |
| 1991 | + radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]); |
| 1992 | + radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]); |
| 1993 | +} |
| 1994 | + |
| 1995 | +static int si_cp_resume(struct radeon_device *rdev) |
| 1996 | +{ |
| 1997 | + struct radeon_ring *ring; |
| 1998 | + u32 tmp; |
| 1999 | + u32 rb_bufsz; |
| 2000 | + int r; |
| 2001 | + |
| 2002 | + /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */ |
| 2003 | + WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP | |
| 2004 | + SOFT_RESET_PA | |
| 2005 | + SOFT_RESET_VGT | |
| 2006 | + SOFT_RESET_SPI | |
| 2007 | + SOFT_RESET_SX)); |
| 2008 | + RREG32(GRBM_SOFT_RESET); |
| 2009 | + mdelay(15); |
| 2010 | + WREG32(GRBM_SOFT_RESET, 0); |
| 2011 | + RREG32(GRBM_SOFT_RESET); |
| 2012 | + |
| 2013 | + WREG32(CP_SEM_WAIT_TIMER, 0x0); |
| 2014 | + WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0); |
| 2015 | + |
| 2016 | + /* Set the write pointer delay */ |
| 2017 | + WREG32(CP_RB_WPTR_DELAY, 0); |
| 2018 | + |
| 2019 | + WREG32(CP_DEBUG, 0); |
| 2020 | + WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF); |
| 2021 | + |
| 2022 | + /* ring 0 - compute and gfx */ |
| 2023 | + /* Set ring buffer size */ |
| 2024 | + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; |
| 2025 | + rb_bufsz = drm_order(ring->ring_size / 8); |
| 2026 | + tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; |
| 2027 | +#ifdef __BIG_ENDIAN |
| 2028 | + tmp |= BUF_SWAP_32BIT; |
| 2029 | +#endif |
| 2030 | + WREG32(CP_RB0_CNTL, tmp); |
| 2031 | + |
| 2032 | + /* Initialize the ring buffer's read and write pointers */ |
| 2033 | + WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA); |
| 2034 | + ring->wptr = 0; |
| 2035 | + WREG32(CP_RB0_WPTR, ring->wptr); |
| 2036 | + |
| 2037 | + /* set the wb address wether it's enabled or not */ |
| 2038 | + WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC); |
| 2039 | + WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF); |
| 2040 | + |
| 2041 | + if (rdev->wb.enabled) |
| 2042 | + WREG32(SCRATCH_UMSK, 0xff); |
| 2043 | + else { |
| 2044 | + tmp |= RB_NO_UPDATE; |
| 2045 | + WREG32(SCRATCH_UMSK, 0); |
| 2046 | + } |
| 2047 | + |
| 2048 | + mdelay(1); |
| 2049 | + WREG32(CP_RB0_CNTL, tmp); |
| 2050 | + |
| 2051 | + WREG32(CP_RB0_BASE, ring->gpu_addr >> 8); |
| 2052 | + |
| 2053 | + ring->rptr = RREG32(CP_RB0_RPTR); |
| 2054 | + |
| 2055 | + /* ring1 - compute only */ |
| 2056 | + /* Set ring buffer size */ |
| 2057 | + ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; |
| 2058 | + rb_bufsz = drm_order(ring->ring_size / 8); |
| 2059 | + tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; |
| 2060 | +#ifdef __BIG_ENDIAN |
| 2061 | + tmp |= BUF_SWAP_32BIT; |
| 2062 | +#endif |
| 2063 | + WREG32(CP_RB1_CNTL, tmp); |
| 2064 | + |
| 2065 | + /* Initialize the ring buffer's read and write pointers */ |
| 2066 | + WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA); |
| 2067 | + ring->wptr = 0; |
| 2068 | + WREG32(CP_RB1_WPTR, ring->wptr); |
| 2069 | + |
| 2070 | + /* set the wb address wether it's enabled or not */ |
| 2071 | + WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC); |
| 2072 | + WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF); |
| 2073 | + |
| 2074 | + mdelay(1); |
| 2075 | + WREG32(CP_RB1_CNTL, tmp); |
| 2076 | + |
| 2077 | + WREG32(CP_RB1_BASE, ring->gpu_addr >> 8); |
| 2078 | + |
| 2079 | + ring->rptr = RREG32(CP_RB1_RPTR); |
| 2080 | + |
| 2081 | + /* ring2 - compute only */ |
| 2082 | + /* Set ring buffer size */ |
| 2083 | + ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; |
| 2084 | + rb_bufsz = drm_order(ring->ring_size / 8); |
| 2085 | + tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; |
| 2086 | +#ifdef __BIG_ENDIAN |
| 2087 | + tmp |= BUF_SWAP_32BIT; |
| 2088 | +#endif |
| 2089 | + WREG32(CP_RB2_CNTL, tmp); |
| 2090 | + |
| 2091 | + /* Initialize the ring buffer's read and write pointers */ |
| 2092 | + WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA); |
| 2093 | + ring->wptr = 0; |
| 2094 | + WREG32(CP_RB2_WPTR, ring->wptr); |
| 2095 | + |
| 2096 | + /* set the wb address wether it's enabled or not */ |
| 2097 | + WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC); |
| 2098 | + WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF); |
| 2099 | + |
| 2100 | + mdelay(1); |
| 2101 | + WREG32(CP_RB2_CNTL, tmp); |
| 2102 | + |
| 2103 | + WREG32(CP_RB2_BASE, ring->gpu_addr >> 8); |
| 2104 | + |
| 2105 | + ring->rptr = RREG32(CP_RB2_RPTR); |
| 2106 | + |
| 2107 | + /* start the rings */ |
| 2108 | + si_cp_start(rdev); |
| 2109 | + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; |
| 2110 | + rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true; |
| 2111 | + rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true; |
| 2112 | + r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); |
| 2113 | + if (r) { |
| 2114 | + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; |
| 2115 | + rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; |
| 2116 | + rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; |
| 2117 | + return r; |
| 2118 | + } |
| 2119 | + r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]); |
| 2120 | + if (r) { |
| 2121 | + rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; |
| 2122 | + } |
| 2123 | + r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]); |
| 2124 | + if (r) { |
| 2125 | + rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; |
| 2126 | + } |
| 2127 | + |
| 2128 | + return 0; |
| 2129 | +} |
| 2130 | + |
1864 | 2131 | bool si_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
|
1865 | 2132 | {
|
1866 | 2133 | u32 srbm_status;
|
|
0 commit comments