| .. | .. |
|---|
| 20 | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
|---|
| 21 | 21 | * |
|---|
| 22 | 22 | */ |
|---|
| 23 | + |
|---|
| 23 | 24 | #include <linux/firmware.h> |
|---|
| 24 | | -#include <drm/drmP.h> |
|---|
| 25 | +#include <linux/module.h> |
|---|
| 26 | + |
|---|
| 25 | 27 | #include "amdgpu.h" |
|---|
| 26 | 28 | #include "amdgpu_ih.h" |
|---|
| 27 | 29 | #include "amdgpu_gfx.h" |
|---|
| .. | .. |
|---|
| 882 | 884 | |
|---|
| 883 | 885 | static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev); |
|---|
| 884 | 886 | static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer); |
|---|
| 885 | | -static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev); |
|---|
| 886 | 887 | static void gfx_v7_0_init_pg(struct amdgpu_device *adev); |
|---|
| 887 | 888 | static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev); |
|---|
| 888 | 889 | |
|---|
| .. | .. |
|---|
| 1849 | 1850 | * |
|---|
| 1850 | 1851 | */ |
|---|
| 1851 | 1852 | #define DEFAULT_SH_MEM_BASES (0x6000) |
|---|
| 1852 | | -#define FIRST_COMPUTE_VMID (8) |
|---|
| 1853 | | -#define LAST_COMPUTE_VMID (16) |
|---|
| 1854 | 1853 | static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) |
|---|
| 1855 | 1854 | { |
|---|
| 1856 | 1855 | int i; |
|---|
| .. | .. |
|---|
| 1868 | 1867 | SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; |
|---|
| 1869 | 1868 | sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT; |
|---|
| 1870 | 1869 | mutex_lock(&adev->srbm_mutex); |
|---|
| 1871 | | - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { |
|---|
| 1870 | + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { |
|---|
| 1872 | 1871 | cik_srbm_select(adev, 0, 0, 0, i); |
|---|
| 1873 | 1872 | /* CP and shaders */ |
|---|
| 1874 | 1873 | WREG32(mmSH_MEM_CONFIG, sh_mem_config); |
|---|
| .. | .. |
|---|
| 1878 | 1877 | } |
|---|
| 1879 | 1878 | cik_srbm_select(adev, 0, 0, 0, 0); |
|---|
| 1880 | 1879 | mutex_unlock(&adev->srbm_mutex); |
|---|
| 1880 | + |
|---|
| 1881 | + /* Initialize all compute VMIDs to have no GDS, GWS, or OA |
|---|
| 1882 | + acccess. These should be enabled by FW for target VMIDs. */ |
|---|
| 1883 | + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { |
|---|
| 1884 | + WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); |
|---|
| 1885 | + WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); |
|---|
| 1886 | + WREG32(amdgpu_gds_reg_offset[i].gws, 0); |
|---|
| 1887 | + WREG32(amdgpu_gds_reg_offset[i].oa, 0); |
|---|
| 1888 | + } |
|---|
| 1889 | +} |
|---|
| 1890 | + |
|---|
| 1891 | +static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev) |
|---|
| 1892 | +{ |
|---|
| 1893 | + int vmid; |
|---|
| 1894 | + |
|---|
| 1895 | + /* |
|---|
| 1896 | + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA |
|---|
| 1897 | + * access. Compute VMIDs should be enabled by FW for target VMIDs, |
|---|
| 1898 | + * the driver can enable them for graphics. VMID0 should maintain |
|---|
| 1899 | + * access so that HWS firmware can save/restore entries. |
|---|
| 1900 | + */ |
|---|
| 1901 | + for (vmid = 1; vmid < 16; vmid++) { |
|---|
| 1902 | + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); |
|---|
| 1903 | + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); |
|---|
| 1904 | + WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); |
|---|
| 1905 | + WREG32(amdgpu_gds_reg_offset[vmid].oa, 0); |
|---|
| 1906 | + } |
|---|
| 1881 | 1907 | } |
|---|
| 1882 | 1908 | |
|---|
| 1883 | 1909 | static void gfx_v7_0_config_init(struct amdgpu_device *adev) |
|---|
| .. | .. |
|---|
| 1886 | 1912 | } |
|---|
| 1887 | 1913 | |
|---|
| 1888 | 1914 | /** |
|---|
| 1889 | | - * gfx_v7_0_gpu_init - setup the 3D engine |
|---|
| 1915 | + * gfx_v7_0_constants_init - setup the 3D engine |
|---|
| 1890 | 1916 | * |
|---|
| 1891 | 1917 | * @adev: amdgpu_device pointer |
|---|
| 1892 | 1918 | * |
|---|
| 1893 | | - * Configures the 3D engine and tiling configuration |
|---|
| 1894 | | - * registers so that the 3D engine is usable. |
|---|
| 1919 | + * init the gfx constants such as the 3D engine, tiling configuration |
|---|
| 1920 | + * registers, maximum number of quad pipes, render backends... |
|---|
| 1895 | 1921 | */ |
|---|
| 1896 | | -static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) |
|---|
| 1922 | +static void gfx_v7_0_constants_init(struct amdgpu_device *adev) |
|---|
| 1897 | 1923 | { |
|---|
| 1898 | 1924 | u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base; |
|---|
| 1899 | 1925 | u32 tmp; |
|---|
| .. | .. |
|---|
| 1958 | 1984 | mutex_unlock(&adev->srbm_mutex); |
|---|
| 1959 | 1985 | |
|---|
| 1960 | 1986 | gfx_v7_0_init_compute_vmid(adev); |
|---|
| 1987 | + gfx_v7_0_init_gds_vmid(adev); |
|---|
| 1961 | 1988 | |
|---|
| 1962 | 1989 | WREG32(mmSX_DEBUG_1, 0x20); |
|---|
| 1963 | 1990 | |
|---|
| .. | .. |
|---|
| 2064 | 2091 | int r; |
|---|
| 2065 | 2092 | |
|---|
| 2066 | 2093 | r = amdgpu_gfx_scratch_get(adev, &scratch); |
|---|
| 2067 | | - if (r) { |
|---|
| 2068 | | - DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); |
|---|
| 2094 | + if (r) |
|---|
| 2069 | 2095 | return r; |
|---|
| 2070 | | - } |
|---|
| 2096 | + |
|---|
| 2071 | 2097 | WREG32(scratch, 0xCAFEDEAD); |
|---|
| 2072 | 2098 | r = amdgpu_ring_alloc(ring, 3); |
|---|
| 2073 | | - if (r) { |
|---|
| 2074 | | - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); |
|---|
| 2075 | | - amdgpu_gfx_scratch_free(adev, scratch); |
|---|
| 2076 | | - return r; |
|---|
| 2077 | | - } |
|---|
| 2099 | + if (r) |
|---|
| 2100 | + goto error_free_scratch; |
|---|
| 2101 | + |
|---|
| 2078 | 2102 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); |
|---|
| 2079 | 2103 | amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); |
|---|
| 2080 | 2104 | amdgpu_ring_write(ring, 0xDEADBEEF); |
|---|
| .. | .. |
|---|
| 2084 | 2108 | tmp = RREG32(scratch); |
|---|
| 2085 | 2109 | if (tmp == 0xDEADBEEF) |
|---|
| 2086 | 2110 | break; |
|---|
| 2087 | | - DRM_UDELAY(1); |
|---|
| 2111 | + udelay(1); |
|---|
| 2088 | 2112 | } |
|---|
| 2089 | | - if (i < adev->usec_timeout) { |
|---|
| 2090 | | - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); |
|---|
| 2091 | | - } else { |
|---|
| 2092 | | - DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", |
|---|
| 2093 | | - ring->idx, scratch, tmp); |
|---|
| 2094 | | - r = -EINVAL; |
|---|
| 2095 | | - } |
|---|
| 2113 | + if (i >= adev->usec_timeout) |
|---|
| 2114 | + r = -ETIMEDOUT; |
|---|
| 2115 | + |
|---|
| 2116 | +error_free_scratch: |
|---|
| 2096 | 2117 | amdgpu_gfx_scratch_free(adev, scratch); |
|---|
| 2097 | 2118 | return r; |
|---|
| 2098 | 2119 | } |
|---|
| .. | .. |
|---|
| 2233 | 2254 | * on the gfx ring for execution by the GPU. |
|---|
| 2234 | 2255 | */ |
|---|
| 2235 | 2256 | static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, |
|---|
| 2236 | | - struct amdgpu_ib *ib, |
|---|
| 2237 | | - unsigned vmid, bool ctx_switch) |
|---|
| 2257 | + struct amdgpu_job *job, |
|---|
| 2258 | + struct amdgpu_ib *ib, |
|---|
| 2259 | + uint32_t flags) |
|---|
| 2238 | 2260 | { |
|---|
| 2261 | + unsigned vmid = AMDGPU_JOB_GET_VMID(job); |
|---|
| 2239 | 2262 | u32 header, control = 0; |
|---|
| 2240 | 2263 | |
|---|
| 2241 | 2264 | /* insert SWITCH_BUFFER packet before first IB in the ring frame */ |
|---|
| 2242 | | - if (ctx_switch) { |
|---|
| 2265 | + if (flags & AMDGPU_HAVE_CTX_SWITCH) { |
|---|
| 2243 | 2266 | amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); |
|---|
| 2244 | 2267 | amdgpu_ring_write(ring, 0); |
|---|
| 2245 | 2268 | } |
|---|
| .. | .. |
|---|
| 2262 | 2285 | } |
|---|
| 2263 | 2286 | |
|---|
| 2264 | 2287 | static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, |
|---|
| 2288 | + struct amdgpu_job *job, |
|---|
| 2265 | 2289 | struct amdgpu_ib *ib, |
|---|
| 2266 | | - unsigned vmid, bool ctx_switch) |
|---|
| 2290 | + uint32_t flags) |
|---|
| 2267 | 2291 | { |
|---|
| 2292 | + unsigned vmid = AMDGPU_JOB_GET_VMID(job); |
|---|
| 2268 | 2293 | u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); |
|---|
| 2294 | + |
|---|
| 2295 | + /* Currently, there is a high possibility to get wave ID mismatch |
|---|
| 2296 | + * between ME and GDS, leading to a hw deadlock, because ME generates |
|---|
| 2297 | + * different wave IDs than the GDS expects. This situation happens |
|---|
| 2298 | + * randomly when at least 5 compute pipes use GDS ordered append. |
|---|
| 2299 | + * The wave IDs generated by ME are also wrong after suspend/resume. |
|---|
| 2300 | + * Those are probably bugs somewhere else in the kernel driver. |
|---|
| 2301 | + * |
|---|
| 2302 | + * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and |
|---|
| 2303 | + * GDS to 0 for this ring (me/pipe). |
|---|
| 2304 | + */ |
|---|
| 2305 | + if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { |
|---|
| 2306 | + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); |
|---|
| 2307 | + amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START); |
|---|
| 2308 | + amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); |
|---|
| 2309 | + } |
|---|
| 2269 | 2310 | |
|---|
| 2270 | 2311 | amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); |
|---|
| 2271 | 2312 | amdgpu_ring_write(ring, |
|---|
| .. | .. |
|---|
| 2316 | 2357 | long r; |
|---|
| 2317 | 2358 | |
|---|
| 2318 | 2359 | r = amdgpu_gfx_scratch_get(adev, &scratch); |
|---|
| 2319 | | - if (r) { |
|---|
| 2320 | | - DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); |
|---|
| 2360 | + if (r) |
|---|
| 2321 | 2361 | return r; |
|---|
| 2322 | | - } |
|---|
| 2362 | + |
|---|
| 2323 | 2363 | WREG32(scratch, 0xCAFEDEAD); |
|---|
| 2324 | 2364 | memset(&ib, 0, sizeof(ib)); |
|---|
| 2325 | | - r = amdgpu_ib_get(adev, NULL, 256, &ib); |
|---|
| 2326 | | - if (r) { |
|---|
| 2327 | | - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); |
|---|
| 2365 | + r = amdgpu_ib_get(adev, NULL, 256, |
|---|
| 2366 | + AMDGPU_IB_POOL_DIRECT, &ib); |
|---|
| 2367 | + if (r) |
|---|
| 2328 | 2368 | goto err1; |
|---|
| 2329 | | - } |
|---|
| 2369 | + |
|---|
| 2330 | 2370 | ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); |
|---|
| 2331 | 2371 | ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); |
|---|
| 2332 | 2372 | ib.ptr[2] = 0xDEADBEEF; |
|---|
| .. | .. |
|---|
| 2338 | 2378 | |
|---|
| 2339 | 2379 | r = dma_fence_wait_timeout(f, false, timeout); |
|---|
| 2340 | 2380 | if (r == 0) { |
|---|
| 2341 | | - DRM_ERROR("amdgpu: IB test timed out\n"); |
|---|
| 2342 | 2381 | r = -ETIMEDOUT; |
|---|
| 2343 | 2382 | goto err2; |
|---|
| 2344 | 2383 | } else if (r < 0) { |
|---|
| 2345 | | - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); |
|---|
| 2346 | 2384 | goto err2; |
|---|
| 2347 | 2385 | } |
|---|
| 2348 | 2386 | tmp = RREG32(scratch); |
|---|
| 2349 | | - if (tmp == 0xDEADBEEF) { |
|---|
| 2350 | | - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); |
|---|
| 2387 | + if (tmp == 0xDEADBEEF) |
|---|
| 2351 | 2388 | r = 0; |
|---|
| 2352 | | - } else { |
|---|
| 2353 | | - DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", |
|---|
| 2354 | | - scratch, tmp); |
|---|
| 2389 | + else |
|---|
| 2355 | 2390 | r = -EINVAL; |
|---|
| 2356 | | - } |
|---|
| 2357 | 2391 | |
|---|
| 2358 | 2392 | err2: |
|---|
| 2359 | 2393 | amdgpu_ib_free(adev, &ib, NULL); |
|---|
| .. | .. |
|---|
| 2396 | 2430 | */ |
|---|
| 2397 | 2431 | static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) |
|---|
| 2398 | 2432 | { |
|---|
| 2399 | | - int i; |
|---|
| 2400 | | - |
|---|
| 2401 | | - if (enable) { |
|---|
| 2433 | + if (enable) |
|---|
| 2402 | 2434 | WREG32(mmCP_ME_CNTL, 0); |
|---|
| 2403 | | - } else { |
|---|
| 2404 | | - WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK)); |
|---|
| 2405 | | - for (i = 0; i < adev->gfx.num_gfx_rings; i++) |
|---|
| 2406 | | - adev->gfx.gfx_ring[i].ready = false; |
|---|
| 2407 | | - } |
|---|
| 2435 | + else |
|---|
| 2436 | + WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | |
|---|
| 2437 | + CP_ME_CNTL__PFP_HALT_MASK | |
|---|
| 2438 | + CP_ME_CNTL__CE_HALT_MASK)); |
|---|
| 2408 | 2439 | udelay(50); |
|---|
| 2409 | 2440 | } |
|---|
| 2410 | 2441 | |
|---|
| .. | .. |
|---|
| 2613 | 2644 | |
|---|
| 2614 | 2645 | /* start the ring */ |
|---|
| 2615 | 2646 | gfx_v7_0_cp_gfx_start(adev); |
|---|
| 2616 | | - ring->ready = true; |
|---|
| 2617 | | - r = amdgpu_ring_test_ring(ring); |
|---|
| 2618 | | - if (r) { |
|---|
| 2619 | | - ring->ready = false; |
|---|
| 2647 | + r = amdgpu_ring_test_helper(ring); |
|---|
| 2648 | + if (r) |
|---|
| 2620 | 2649 | return r; |
|---|
| 2621 | | - } |
|---|
| 2622 | 2650 | |
|---|
| 2623 | 2651 | return 0; |
|---|
| 2624 | 2652 | } |
|---|
| .. | .. |
|---|
| 2668 | 2696 | */ |
|---|
| 2669 | 2697 | static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) |
|---|
| 2670 | 2698 | { |
|---|
| 2671 | | - int i; |
|---|
| 2672 | | - |
|---|
| 2673 | | - if (enable) { |
|---|
| 2699 | + if (enable) |
|---|
| 2674 | 2700 | WREG32(mmCP_MEC_CNTL, 0); |
|---|
| 2675 | | - } else { |
|---|
| 2676 | | - WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); |
|---|
| 2677 | | - for (i = 0; i < adev->gfx.num_compute_rings; i++) |
|---|
| 2678 | | - adev->gfx.compute_ring[i].ready = false; |
|---|
| 2679 | | - } |
|---|
| 2701 | + else |
|---|
| 2702 | + WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | |
|---|
| 2703 | + CP_MEC_CNTL__MEC_ME2_HALT_MASK)); |
|---|
| 2680 | 2704 | udelay(50); |
|---|
| 2681 | 2705 | } |
|---|
| 2682 | 2706 | |
|---|
| .. | .. |
|---|
| 2781 | 2805 | * GFX7_MEC_HPD_SIZE * 2; |
|---|
| 2782 | 2806 | |
|---|
| 2783 | 2807 | r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, |
|---|
| 2784 | | - AMDGPU_GEM_DOMAIN_GTT, |
|---|
| 2808 | + AMDGPU_GEM_DOMAIN_VRAM, |
|---|
| 2785 | 2809 | &adev->gfx.mec.hpd_eop_obj, |
|---|
| 2786 | 2810 | &adev->gfx.mec.hpd_eop_gpu_addr, |
|---|
| 2787 | 2811 | (void **)&hpd); |
|---|
| .. | .. |
|---|
| 3013 | 3037 | mqd->cp_hqd_active = 1; |
|---|
| 3014 | 3038 | } |
|---|
| 3015 | 3039 | |
|---|
| 3016 | | -int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd) |
|---|
| 3040 | +static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd) |
|---|
| 3017 | 3041 | { |
|---|
| 3018 | 3042 | uint32_t tmp; |
|---|
| 3019 | 3043 | uint32_t mqd_reg; |
|---|
| .. | .. |
|---|
| 3106 | 3130 | |
|---|
| 3107 | 3131 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
|---|
| 3108 | 3132 | ring = &adev->gfx.compute_ring[i]; |
|---|
| 3109 | | - ring->ready = true; |
|---|
| 3110 | | - r = amdgpu_ring_test_ring(ring); |
|---|
| 3111 | | - if (r) |
|---|
| 3112 | | - ring->ready = false; |
|---|
| 3133 | + amdgpu_ring_test_helper(ring); |
|---|
| 3113 | 3134 | } |
|---|
| 3114 | 3135 | |
|---|
| 3115 | 3136 | return 0; |
|---|
| .. | .. |
|---|
| 3268 | 3289 | * The RLC is a multi-purpose microengine that handles a |
|---|
| 3269 | 3290 | * variety of functions. |
|---|
| 3270 | 3291 | */ |
|---|
| 3271 | | -static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev) |
|---|
| 3272 | | -{ |
|---|
| 3273 | | - amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL); |
|---|
| 3274 | | - amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); |
|---|
| 3275 | | - amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); |
|---|
| 3276 | | -} |
|---|
| 3277 | | - |
|---|
| 3278 | 3292 | static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) |
|---|
| 3279 | 3293 | { |
|---|
| 3280 | 3294 | const u32 *src_ptr; |
|---|
| 3281 | | - volatile u32 *dst_ptr; |
|---|
| 3282 | | - u32 dws, i; |
|---|
| 3295 | + u32 dws; |
|---|
| 3283 | 3296 | const struct cs_section_def *cs_data; |
|---|
| 3284 | 3297 | int r; |
|---|
| 3285 | 3298 | |
|---|
| .. | .. |
|---|
| 3306 | 3319 | cs_data = adev->gfx.rlc.cs_data; |
|---|
| 3307 | 3320 | |
|---|
| 3308 | 3321 | if (src_ptr) { |
|---|
| 3309 | | - /* save restore block */ |
|---|
| 3310 | | - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, |
|---|
| 3311 | | - AMDGPU_GEM_DOMAIN_VRAM, |
|---|
| 3312 | | - &adev->gfx.rlc.save_restore_obj, |
|---|
| 3313 | | - &adev->gfx.rlc.save_restore_gpu_addr, |
|---|
| 3314 | | - (void **)&adev->gfx.rlc.sr_ptr); |
|---|
| 3315 | | - if (r) { |
|---|
| 3316 | | - dev_warn(adev->dev, "(%d) create, pin or map of RLC sr bo failed\n", r); |
|---|
| 3317 | | - gfx_v7_0_rlc_fini(adev); |
|---|
| 3322 | + /* init save restore block */ |
|---|
| 3323 | + r = amdgpu_gfx_rlc_init_sr(adev, dws); |
|---|
| 3324 | + if (r) |
|---|
| 3318 | 3325 | return r; |
|---|
| 3319 | | - } |
|---|
| 3320 | | - |
|---|
| 3321 | | - /* write the sr buffer */ |
|---|
| 3322 | | - dst_ptr = adev->gfx.rlc.sr_ptr; |
|---|
| 3323 | | - for (i = 0; i < adev->gfx.rlc.reg_list_size; i++) |
|---|
| 3324 | | - dst_ptr[i] = cpu_to_le32(src_ptr[i]); |
|---|
| 3325 | | - amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj); |
|---|
| 3326 | | - amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); |
|---|
| 3327 | 3326 | } |
|---|
| 3328 | 3327 | |
|---|
| 3329 | 3328 | if (cs_data) { |
|---|
| 3330 | | - /* clear state block */ |
|---|
| 3331 | | - adev->gfx.rlc.clear_state_size = dws = gfx_v7_0_get_csb_size(adev); |
|---|
| 3332 | | - |
|---|
| 3333 | | - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, |
|---|
| 3334 | | - AMDGPU_GEM_DOMAIN_VRAM, |
|---|
| 3335 | | - &adev->gfx.rlc.clear_state_obj, |
|---|
| 3336 | | - &adev->gfx.rlc.clear_state_gpu_addr, |
|---|
| 3337 | | - (void **)&adev->gfx.rlc.cs_ptr); |
|---|
| 3338 | | - if (r) { |
|---|
| 3339 | | - dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); |
|---|
| 3340 | | - gfx_v7_0_rlc_fini(adev); |
|---|
| 3329 | + /* init clear state block */ |
|---|
| 3330 | + r = amdgpu_gfx_rlc_init_csb(adev); |
|---|
| 3331 | + if (r) |
|---|
| 3341 | 3332 | return r; |
|---|
| 3342 | | - } |
|---|
| 3343 | | - |
|---|
| 3344 | | - /* set up the cs buffer */ |
|---|
| 3345 | | - dst_ptr = adev->gfx.rlc.cs_ptr; |
|---|
| 3346 | | - gfx_v7_0_get_csb_buffer(adev, dst_ptr); |
|---|
| 3347 | | - amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); |
|---|
| 3348 | | - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); |
|---|
| 3349 | 3333 | } |
|---|
| 3350 | 3334 | |
|---|
| 3351 | 3335 | if (adev->gfx.rlc.cp_table_size) { |
|---|
| 3352 | | - |
|---|
| 3353 | | - r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, |
|---|
| 3354 | | - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, |
|---|
| 3355 | | - &adev->gfx.rlc.cp_table_obj, |
|---|
| 3356 | | - &adev->gfx.rlc.cp_table_gpu_addr, |
|---|
| 3357 | | - (void **)&adev->gfx.rlc.cp_table_ptr); |
|---|
| 3358 | | - if (r) { |
|---|
| 3359 | | - dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); |
|---|
| 3360 | | - gfx_v7_0_rlc_fini(adev); |
|---|
| 3336 | + r = amdgpu_gfx_rlc_init_cpt(adev); |
|---|
| 3337 | + if (r) |
|---|
| 3361 | 3338 | return r; |
|---|
| 3362 | | - } |
|---|
| 3363 | | - |
|---|
| 3364 | | - gfx_v7_0_init_cp_pg_table(adev); |
|---|
| 3365 | | - |
|---|
| 3366 | | - amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); |
|---|
| 3367 | | - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); |
|---|
| 3368 | | - |
|---|
| 3369 | 3339 | } |
|---|
| 3340 | + |
|---|
| 3341 | + /* init spm vmid with 0xf */ |
|---|
| 3342 | + if (adev->gfx.rlc.funcs->update_spm_vmid) |
|---|
| 3343 | + adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); |
|---|
| 3370 | 3344 | |
|---|
| 3371 | 3345 | return 0; |
|---|
| 3372 | 3346 | } |
|---|
| .. | .. |
|---|
| 3446 | 3420 | return orig; |
|---|
| 3447 | 3421 | } |
|---|
| 3448 | 3422 | |
|---|
| 3449 | | -static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev) |
|---|
| 3423 | +static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev) |
|---|
| 3424 | +{ |
|---|
| 3425 | + return true; |
|---|
| 3426 | +} |
|---|
| 3427 | + |
|---|
| 3428 | +static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev) |
|---|
| 3450 | 3429 | { |
|---|
| 3451 | 3430 | u32 tmp, i, mask; |
|---|
| 3452 | 3431 | |
|---|
| .. | .. |
|---|
| 3468 | 3447 | } |
|---|
| 3469 | 3448 | } |
|---|
| 3470 | 3449 | |
|---|
| 3471 | | -static void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev) |
|---|
| 3450 | +static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev) |
|---|
| 3472 | 3451 | { |
|---|
| 3473 | 3452 | u32 tmp; |
|---|
| 3474 | 3453 | |
|---|
| .. | .. |
|---|
| 3545 | 3524 | adev->gfx.rlc_feature_version = le32_to_cpu( |
|---|
| 3546 | 3525 | hdr->ucode_feature_version); |
|---|
| 3547 | 3526 | |
|---|
| 3548 | | - gfx_v7_0_rlc_stop(adev); |
|---|
| 3527 | + adev->gfx.rlc.funcs->stop(adev); |
|---|
| 3549 | 3528 | |
|---|
| 3550 | 3529 | /* disable CG */ |
|---|
| 3551 | 3530 | tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc; |
|---|
| 3552 | 3531 | WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); |
|---|
| 3553 | 3532 | |
|---|
| 3554 | | - gfx_v7_0_rlc_reset(adev); |
|---|
| 3533 | + adev->gfx.rlc.funcs->reset(adev); |
|---|
| 3555 | 3534 | |
|---|
| 3556 | 3535 | gfx_v7_0_init_pg(adev); |
|---|
| 3557 | 3536 | |
|---|
| .. | .. |
|---|
| 3582 | 3561 | if (adev->asic_type == CHIP_BONAIRE) |
|---|
| 3583 | 3562 | WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0); |
|---|
| 3584 | 3563 | |
|---|
| 3585 | | - gfx_v7_0_rlc_start(adev); |
|---|
| 3564 | + adev->gfx.rlc.funcs->start(adev); |
|---|
| 3586 | 3565 | |
|---|
| 3587 | 3566 | return 0; |
|---|
| 3567 | +} |
|---|
| 3568 | + |
|---|
| 3569 | +static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) |
|---|
| 3570 | +{ |
|---|
| 3571 | + u32 data; |
|---|
| 3572 | + |
|---|
| 3573 | + data = RREG32(mmRLC_SPM_VMID); |
|---|
| 3574 | + |
|---|
| 3575 | + data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK; |
|---|
| 3576 | + data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT; |
|---|
| 3577 | + |
|---|
| 3578 | + WREG32(mmRLC_SPM_VMID, data); |
|---|
| 3588 | 3579 | } |
|---|
| 3589 | 3580 | |
|---|
| 3590 | 3581 | static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) |
|---|
| .. | .. |
|---|
| 3784 | 3775 | WREG32(mmRLC_PG_CNTL, data); |
|---|
| 3785 | 3776 | } |
|---|
| 3786 | 3777 | |
|---|
| 3787 | | -static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev) |
|---|
| 3778 | +static int gfx_v7_0_cp_pg_table_num(struct amdgpu_device *adev) |
|---|
| 3788 | 3779 | { |
|---|
| 3789 | | - const __le32 *fw_data; |
|---|
| 3790 | | - volatile u32 *dst_ptr; |
|---|
| 3791 | | - int me, i, max_me = 4; |
|---|
| 3792 | | - u32 bo_offset = 0; |
|---|
| 3793 | | - u32 table_offset, table_size; |
|---|
| 3794 | | - |
|---|
| 3795 | 3780 | if (adev->asic_type == CHIP_KAVERI) |
|---|
| 3796 | | - max_me = 5; |
|---|
| 3797 | | - |
|---|
| 3798 | | - if (adev->gfx.rlc.cp_table_ptr == NULL) |
|---|
| 3799 | | - return; |
|---|
| 3800 | | - |
|---|
| 3801 | | - /* write the cp table buffer */ |
|---|
| 3802 | | - dst_ptr = adev->gfx.rlc.cp_table_ptr; |
|---|
| 3803 | | - for (me = 0; me < max_me; me++) { |
|---|
| 3804 | | - if (me == 0) { |
|---|
| 3805 | | - const struct gfx_firmware_header_v1_0 *hdr = |
|---|
| 3806 | | - (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; |
|---|
| 3807 | | - fw_data = (const __le32 *) |
|---|
| 3808 | | - (adev->gfx.ce_fw->data + |
|---|
| 3809 | | - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
|---|
| 3810 | | - table_offset = le32_to_cpu(hdr->jt_offset); |
|---|
| 3811 | | - table_size = le32_to_cpu(hdr->jt_size); |
|---|
| 3812 | | - } else if (me == 1) { |
|---|
| 3813 | | - const struct gfx_firmware_header_v1_0 *hdr = |
|---|
| 3814 | | - (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; |
|---|
| 3815 | | - fw_data = (const __le32 *) |
|---|
| 3816 | | - (adev->gfx.pfp_fw->data + |
|---|
| 3817 | | - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
|---|
| 3818 | | - table_offset = le32_to_cpu(hdr->jt_offset); |
|---|
| 3819 | | - table_size = le32_to_cpu(hdr->jt_size); |
|---|
| 3820 | | - } else if (me == 2) { |
|---|
| 3821 | | - const struct gfx_firmware_header_v1_0 *hdr = |
|---|
| 3822 | | - (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; |
|---|
| 3823 | | - fw_data = (const __le32 *) |
|---|
| 3824 | | - (adev->gfx.me_fw->data + |
|---|
| 3825 | | - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
|---|
| 3826 | | - table_offset = le32_to_cpu(hdr->jt_offset); |
|---|
| 3827 | | - table_size = le32_to_cpu(hdr->jt_size); |
|---|
| 3828 | | - } else if (me == 3) { |
|---|
| 3829 | | - const struct gfx_firmware_header_v1_0 *hdr = |
|---|
| 3830 | | - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; |
|---|
| 3831 | | - fw_data = (const __le32 *) |
|---|
| 3832 | | - (adev->gfx.mec_fw->data + |
|---|
| 3833 | | - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
|---|
| 3834 | | - table_offset = le32_to_cpu(hdr->jt_offset); |
|---|
| 3835 | | - table_size = le32_to_cpu(hdr->jt_size); |
|---|
| 3836 | | - } else { |
|---|
| 3837 | | - const struct gfx_firmware_header_v1_0 *hdr = |
|---|
| 3838 | | - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; |
|---|
| 3839 | | - fw_data = (const __le32 *) |
|---|
| 3840 | | - (adev->gfx.mec2_fw->data + |
|---|
| 3841 | | - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
|---|
| 3842 | | - table_offset = le32_to_cpu(hdr->jt_offset); |
|---|
| 3843 | | - table_size = le32_to_cpu(hdr->jt_size); |
|---|
| 3844 | | - } |
|---|
| 3845 | | - |
|---|
| 3846 | | - for (i = 0; i < table_size; i ++) { |
|---|
| 3847 | | - dst_ptr[bo_offset + i] = |
|---|
| 3848 | | - cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); |
|---|
| 3849 | | - } |
|---|
| 3850 | | - |
|---|
| 3851 | | - bo_offset += table_size; |
|---|
| 3852 | | - } |
|---|
| 3781 | + return 5; |
|---|
| 3782 | + else |
|---|
| 3783 | + return 4; |
|---|
| 3853 | 3784 | } |
|---|
| 3854 | 3785 | |
|---|
| 3855 | 3786 | static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev, |
|---|
| .. | .. |
|---|
| 4170 | 4101 | uint32_t gws_base, uint32_t gws_size, |
|---|
| 4171 | 4102 | uint32_t oa_base, uint32_t oa_size) |
|---|
| 4172 | 4103 | { |
|---|
| 4173 | | - gds_base = gds_base >> AMDGPU_GDS_SHIFT; |
|---|
| 4174 | | - gds_size = gds_size >> AMDGPU_GDS_SHIFT; |
|---|
| 4175 | | - |
|---|
| 4176 | | - gws_base = gws_base >> AMDGPU_GWS_SHIFT; |
|---|
| 4177 | | - gws_size = gws_size >> AMDGPU_GWS_SHIFT; |
|---|
| 4178 | | - |
|---|
| 4179 | | - oa_base = oa_base >> AMDGPU_OA_SHIFT; |
|---|
| 4180 | | - oa_size = oa_size >> AMDGPU_OA_SHIFT; |
|---|
| 4181 | | - |
|---|
| 4182 | 4104 | /* GDS Base */ |
|---|
| 4183 | 4105 | amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); |
|---|
| 4184 | 4106 | amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | |
|---|
| .. | .. |
|---|
| 4210 | 4132 | amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); |
|---|
| 4211 | 4133 | amdgpu_ring_write(ring, 0); |
|---|
| 4212 | 4134 | amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); |
|---|
| 4135 | +} |
|---|
| 4136 | + |
|---|
| 4137 | +static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) |
|---|
| 4138 | +{ |
|---|
| 4139 | + struct amdgpu_device *adev = ring->adev; |
|---|
| 4140 | + uint32_t value = 0; |
|---|
| 4141 | + |
|---|
| 4142 | + value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); |
|---|
| 4143 | + value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); |
|---|
| 4144 | + value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); |
|---|
| 4145 | + value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); |
|---|
| 4146 | + WREG32(mmSQ_CMD, value); |
|---|
| 4213 | 4147 | } |
|---|
| 4214 | 4148 | |
|---|
| 4215 | 4149 | static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) |
|---|
| .. | .. |
|---|
| 4271 | 4205 | } |
|---|
| 4272 | 4206 | |
|---|
| 4273 | 4207 | static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev, |
|---|
| 4274 | | - u32 me, u32 pipe, u32 q) |
|---|
| 4208 | + u32 me, u32 pipe, u32 q, u32 vm) |
|---|
| 4275 | 4209 | { |
|---|
| 4276 | | - cik_srbm_select(adev, me, pipe, q, 0); |
|---|
| 4210 | + cik_srbm_select(adev, me, pipe, q, vm); |
|---|
| 4277 | 4211 | } |
|---|
| 4278 | 4212 | |
|---|
| 4279 | 4213 | static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = { |
|---|
| .. | .. |
|---|
| 4285 | 4219 | }; |
|---|
| 4286 | 4220 | |
|---|
| 4287 | 4221 | static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = { |
|---|
| 4288 | | - .enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode, |
|---|
| 4289 | | - .exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode |
|---|
| 4222 | + .is_rlc_enabled = gfx_v7_0_is_rlc_enabled, |
|---|
| 4223 | + .set_safe_mode = gfx_v7_0_set_safe_mode, |
|---|
| 4224 | + .unset_safe_mode = gfx_v7_0_unset_safe_mode, |
|---|
| 4225 | + .init = gfx_v7_0_rlc_init, |
|---|
| 4226 | + .get_csb_size = gfx_v7_0_get_csb_size, |
|---|
| 4227 | + .get_csb_buffer = gfx_v7_0_get_csb_buffer, |
|---|
| 4228 | + .get_cp_table_num = gfx_v7_0_cp_pg_table_num, |
|---|
| 4229 | + .resume = gfx_v7_0_rlc_resume, |
|---|
| 4230 | + .stop = gfx_v7_0_rlc_stop, |
|---|
| 4231 | + .reset = gfx_v7_0_rlc_reset, |
|---|
| 4232 | + .start = gfx_v7_0_rlc_start, |
|---|
| 4233 | + .update_spm_vmid = gfx_v7_0_update_spm_vmid |
|---|
| 4290 | 4234 | }; |
|---|
| 4291 | 4235 | |
|---|
| 4292 | 4236 | static int gfx_v7_0_early_init(void *handle) |
|---|
| .. | .. |
|---|
| 4323 | 4267 | static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) |
|---|
| 4324 | 4268 | { |
|---|
| 4325 | 4269 | u32 gb_addr_config; |
|---|
| 4326 | | - u32 mc_shared_chmap, mc_arb_ramcfg; |
|---|
| 4270 | + u32 mc_arb_ramcfg; |
|---|
| 4327 | 4271 | u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; |
|---|
| 4328 | 4272 | u32 tmp; |
|---|
| 4329 | 4273 | |
|---|
| .. | .. |
|---|
| 4400 | 4344 | break; |
|---|
| 4401 | 4345 | } |
|---|
| 4402 | 4346 | |
|---|
| 4403 | | - mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); |
|---|
| 4404 | 4347 | adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); |
|---|
| 4405 | 4348 | mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; |
|---|
| 4349 | + |
|---|
| 4350 | + adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg, |
|---|
| 4351 | + MC_ARB_RAMCFG, NOOFBANK); |
|---|
| 4352 | + adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg, |
|---|
| 4353 | + MC_ARB_RAMCFG, NOOFRANKS); |
|---|
| 4406 | 4354 | |
|---|
| 4407 | 4355 | adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; |
|---|
| 4408 | 4356 | adev->gfx.config.mem_max_burst_length_bytes = 256; |
|---|
| .. | .. |
|---|
| 4474 | 4422 | |
|---|
| 4475 | 4423 | ring->ring_obj = NULL; |
|---|
| 4476 | 4424 | ring->use_doorbell = true; |
|---|
| 4477 | | - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; |
|---|
| 4425 | + ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id; |
|---|
| 4478 | 4426 | sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); |
|---|
| 4479 | 4427 | |
|---|
| 4480 | 4428 | irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP |
|---|
| .. | .. |
|---|
| 4483 | 4431 | |
|---|
| 4484 | 4432 | /* type-2 packets are deprecated on MEC, use type-3 instead */ |
|---|
| 4485 | 4433 | r = amdgpu_ring_init(adev, ring, 1024, |
|---|
| 4486 | | - &adev->gfx.eop_irq, irq_type); |
|---|
| 4434 | + &adev->gfx.eop_irq, irq_type, |
|---|
| 4435 | + AMDGPU_RING_PRIO_DEFAULT); |
|---|
| 4487 | 4436 | if (r) |
|---|
| 4488 | 4437 | return r; |
|---|
| 4489 | 4438 | |
|---|
| .. | .. |
|---|
| 4513 | 4462 | adev->gfx.mec.num_queue_per_pipe = 8; |
|---|
| 4514 | 4463 | |
|---|
| 4515 | 4464 | /* EOP Event */ |
|---|
| 4516 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); |
|---|
| 4465 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); |
|---|
| 4517 | 4466 | if (r) |
|---|
| 4518 | 4467 | return r; |
|---|
| 4519 | 4468 | |
|---|
| 4520 | 4469 | /* Privileged reg */ |
|---|
| 4521 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, |
|---|
| 4470 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184, |
|---|
| 4522 | 4471 | &adev->gfx.priv_reg_irq); |
|---|
| 4523 | 4472 | if (r) |
|---|
| 4524 | 4473 | return r; |
|---|
| 4525 | 4474 | |
|---|
| 4526 | 4475 | /* Privileged inst */ |
|---|
| 4527 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, |
|---|
| 4476 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185, |
|---|
| 4528 | 4477 | &adev->gfx.priv_inst_irq); |
|---|
| 4529 | 4478 | if (r) |
|---|
| 4530 | 4479 | return r; |
|---|
| .. | .. |
|---|
| 4537 | 4486 | return r; |
|---|
| 4538 | 4487 | } |
|---|
| 4539 | 4488 | |
|---|
| 4540 | | - r = gfx_v7_0_rlc_init(adev); |
|---|
| 4489 | + r = adev->gfx.rlc.funcs->init(adev); |
|---|
| 4541 | 4490 | if (r) { |
|---|
| 4542 | 4491 | DRM_ERROR("Failed to init rlc BOs!\n"); |
|---|
| 4543 | 4492 | return r; |
|---|
| .. | .. |
|---|
| 4555 | 4504 | ring->ring_obj = NULL; |
|---|
| 4556 | 4505 | sprintf(ring->name, "gfx"); |
|---|
| 4557 | 4506 | r = amdgpu_ring_init(adev, ring, 1024, |
|---|
| 4558 | | - &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); |
|---|
| 4507 | + &adev->gfx.eop_irq, |
|---|
| 4508 | + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, |
|---|
| 4509 | + AMDGPU_RING_PRIO_DEFAULT); |
|---|
| 4559 | 4510 | if (r) |
|---|
| 4560 | 4511 | return r; |
|---|
| 4561 | 4512 | } |
|---|
| .. | .. |
|---|
| 4579 | 4530 | } |
|---|
| 4580 | 4531 | } |
|---|
| 4581 | 4532 | |
|---|
| 4582 | | - /* reserve GDS, GWS and OA resource for gfx */ |
|---|
| 4583 | | - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, |
|---|
| 4584 | | - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, |
|---|
| 4585 | | - &adev->gds.gds_gfx_bo, NULL, NULL); |
|---|
| 4586 | | - if (r) |
|---|
| 4587 | | - return r; |
|---|
| 4588 | | - |
|---|
| 4589 | | - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, |
|---|
| 4590 | | - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, |
|---|
| 4591 | | - &adev->gds.gws_gfx_bo, NULL, NULL); |
|---|
| 4592 | | - if (r) |
|---|
| 4593 | | - return r; |
|---|
| 4594 | | - |
|---|
| 4595 | | - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, |
|---|
| 4596 | | - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, |
|---|
| 4597 | | - &adev->gds.oa_gfx_bo, NULL, NULL); |
|---|
| 4598 | | - if (r) |
|---|
| 4599 | | - return r; |
|---|
| 4600 | | - |
|---|
| 4601 | 4533 | adev->gfx.ce_ram_size = 0x8000; |
|---|
| 4602 | 4534 | |
|---|
| 4603 | 4535 | gfx_v7_0_gpu_early_init(adev); |
|---|
| .. | .. |
|---|
| 4607 | 4539 | |
|---|
| 4608 | 4540 | static int gfx_v7_0_sw_fini(void *handle) |
|---|
| 4609 | 4541 | { |
|---|
| 4610 | | - int i; |
|---|
| 4611 | 4542 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
|---|
| 4612 | | - |
|---|
| 4613 | | - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); |
|---|
| 4614 | | - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); |
|---|
| 4615 | | - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); |
|---|
| 4543 | + int i; |
|---|
| 4616 | 4544 | |
|---|
| 4617 | 4545 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) |
|---|
| 4618 | 4546 | amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); |
|---|
| .. | .. |
|---|
| 4620 | 4548 | amdgpu_ring_fini(&adev->gfx.compute_ring[i]); |
|---|
| 4621 | 4549 | |
|---|
| 4622 | 4550 | gfx_v7_0_cp_compute_fini(adev); |
|---|
| 4623 | | - gfx_v7_0_rlc_fini(adev); |
|---|
| 4551 | + amdgpu_gfx_rlc_fini(adev); |
|---|
| 4624 | 4552 | gfx_v7_0_mec_fini(adev); |
|---|
| 4625 | 4553 | amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, |
|---|
| 4626 | 4554 | &adev->gfx.rlc.clear_state_gpu_addr, |
|---|
| .. | .. |
|---|
| 4640 | 4568 | int r; |
|---|
| 4641 | 4569 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
|---|
| 4642 | 4570 | |
|---|
| 4643 | | - gfx_v7_0_gpu_init(adev); |
|---|
| 4571 | + gfx_v7_0_constants_init(adev); |
|---|
| 4644 | 4572 | |
|---|
| 4573 | + /* init CSB */ |
|---|
| 4574 | + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); |
|---|
| 4645 | 4575 | /* init rlc */ |
|---|
| 4646 | | - r = gfx_v7_0_rlc_resume(adev); |
|---|
| 4576 | + r = adev->gfx.rlc.funcs->resume(adev); |
|---|
| 4647 | 4577 | if (r) |
|---|
| 4648 | 4578 | return r; |
|---|
| 4649 | 4579 | |
|---|
| .. | .. |
|---|
| 4661 | 4591 | amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); |
|---|
| 4662 | 4592 | amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); |
|---|
| 4663 | 4593 | gfx_v7_0_cp_enable(adev, false); |
|---|
| 4664 | | - gfx_v7_0_rlc_stop(adev); |
|---|
| 4594 | + adev->gfx.rlc.funcs->stop(adev); |
|---|
| 4665 | 4595 | gfx_v7_0_fini_pg(adev); |
|---|
| 4666 | 4596 | |
|---|
| 4667 | 4597 | return 0; |
|---|
| .. | .. |
|---|
| 4746 | 4676 | gfx_v7_0_update_cg(adev, false); |
|---|
| 4747 | 4677 | |
|---|
| 4748 | 4678 | /* stop the rlc */ |
|---|
| 4749 | | - gfx_v7_0_rlc_stop(adev); |
|---|
| 4679 | + adev->gfx.rlc.funcs->stop(adev); |
|---|
| 4750 | 4680 | |
|---|
| 4751 | 4681 | /* Disable GFX parsing/prefetching */ |
|---|
| 4752 | 4682 | WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK); |
|---|
| .. | .. |
|---|
| 4915 | 4845 | enum amdgpu_interrupt_state state) |
|---|
| 4916 | 4846 | { |
|---|
| 4917 | 4847 | switch (type) { |
|---|
| 4918 | | - case AMDGPU_CP_IRQ_GFX_EOP: |
|---|
| 4848 | + case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: |
|---|
| 4919 | 4849 | gfx_v7_0_set_gfx_eop_interrupt_state(adev, state); |
|---|
| 4920 | 4850 | break; |
|---|
| 4921 | 4851 | case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: |
|---|
| .. | .. |
|---|
| 4975 | 4905 | return 0; |
|---|
| 4976 | 4906 | } |
|---|
| 4977 | 4907 | |
|---|
| 4908 | +static void gfx_v7_0_fault(struct amdgpu_device *adev, |
|---|
| 4909 | + struct amdgpu_iv_entry *entry) |
|---|
| 4910 | +{ |
|---|
| 4911 | + struct amdgpu_ring *ring; |
|---|
| 4912 | + u8 me_id, pipe_id; |
|---|
| 4913 | + int i; |
|---|
| 4914 | + |
|---|
| 4915 | + me_id = (entry->ring_id & 0x0c) >> 2; |
|---|
| 4916 | + pipe_id = (entry->ring_id & 0x03) >> 0; |
|---|
| 4917 | + switch (me_id) { |
|---|
| 4918 | + case 0: |
|---|
| 4919 | + drm_sched_fault(&adev->gfx.gfx_ring[0].sched); |
|---|
| 4920 | + break; |
|---|
| 4921 | + case 1: |
|---|
| 4922 | + case 2: |
|---|
| 4923 | + for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
|---|
| 4924 | + ring = &adev->gfx.compute_ring[i]; |
|---|
| 4925 | + if ((ring->me == me_id) && (ring->pipe == pipe_id)) |
|---|
| 4926 | + drm_sched_fault(&ring->sched); |
|---|
| 4927 | + } |
|---|
| 4928 | + break; |
|---|
| 4929 | + } |
|---|
| 4930 | +} |
|---|
| 4931 | + |
|---|
| 4978 | 4932 | static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev, |
|---|
| 4979 | 4933 | struct amdgpu_irq_src *source, |
|---|
| 4980 | 4934 | struct amdgpu_iv_entry *entry) |
|---|
| 4981 | 4935 | { |
|---|
| 4982 | 4936 | DRM_ERROR("Illegal register access in command stream\n"); |
|---|
| 4983 | | - schedule_work(&adev->reset_work); |
|---|
| 4937 | + gfx_v7_0_fault(adev, entry); |
|---|
| 4984 | 4938 | return 0; |
|---|
| 4985 | 4939 | } |
|---|
| 4986 | 4940 | |
|---|
| .. | .. |
|---|
| 4990 | 4944 | { |
|---|
| 4991 | 4945 | DRM_ERROR("Illegal instruction in command stream\n"); |
|---|
| 4992 | 4946 | // XXX soft reset the gfx block only |
|---|
| 4993 | | - schedule_work(&adev->reset_work); |
|---|
| 4947 | + gfx_v7_0_fault(adev, entry); |
|---|
| 4994 | 4948 | return 0; |
|---|
| 4995 | 4949 | } |
|---|
| 4996 | 4950 | |
|---|
| .. | .. |
|---|
| 5042 | 4996 | return 0; |
|---|
| 5043 | 4997 | } |
|---|
| 5044 | 4998 | |
|---|
| 4999 | +static void gfx_v7_0_emit_mem_sync(struct amdgpu_ring *ring) |
|---|
| 5000 | +{ |
|---|
| 5001 | + amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); |
|---|
| 5002 | + amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | |
|---|
| 5003 | + PACKET3_TC_ACTION_ENA | |
|---|
| 5004 | + PACKET3_SH_KCACHE_ACTION_ENA | |
|---|
| 5005 | + PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */ |
|---|
| 5006 | + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ |
|---|
| 5007 | + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ |
|---|
| 5008 | + amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ |
|---|
| 5009 | +} |
|---|
| 5010 | + |
|---|
| 5011 | +static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring) |
|---|
| 5012 | +{ |
|---|
| 5013 | + amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); |
|---|
| 5014 | + amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | |
|---|
| 5015 | + PACKET3_TC_ACTION_ENA | |
|---|
| 5016 | + PACKET3_SH_KCACHE_ACTION_ENA | |
|---|
| 5017 | + PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */ |
|---|
| 5018 | + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ |
|---|
| 5019 | + amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */ |
|---|
| 5020 | + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ |
|---|
| 5021 | + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ |
|---|
| 5022 | + amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ |
|---|
| 5023 | +} |
|---|
| 5024 | + |
|---|
| 5045 | 5025 | static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { |
|---|
| 5046 | 5026 | .name = "gfx_v7_0", |
|---|
| 5047 | 5027 | .early_init = gfx_v7_0_early_init, |
|---|
| .. | .. |
|---|
| 5074 | 5054 | 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */ |
|---|
| 5075 | 5055 | 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */ |
|---|
| 5076 | 5056 | CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */ |
|---|
| 5077 | | - 3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/ |
|---|
| 5057 | + 3 + 4 + /* gfx_v7_ring_emit_cntxcntl including vgt flush*/ |
|---|
| 5058 | + 5, /* SURFACE_SYNC */ |
|---|
| 5078 | 5059 | .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */ |
|---|
| 5079 | 5060 | .emit_ib = gfx_v7_0_ring_emit_ib_gfx, |
|---|
| 5080 | 5061 | .emit_fence = gfx_v7_0_ring_emit_fence_gfx, |
|---|
| .. | .. |
|---|
| 5088 | 5069 | .pad_ib = amdgpu_ring_generic_pad_ib, |
|---|
| 5089 | 5070 | .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl, |
|---|
| 5090 | 5071 | .emit_wreg = gfx_v7_0_ring_emit_wreg, |
|---|
| 5072 | + .soft_recovery = gfx_v7_0_ring_soft_recovery, |
|---|
| 5073 | + .emit_mem_sync = gfx_v7_0_emit_mem_sync, |
|---|
| 5091 | 5074 | }; |
|---|
| 5092 | 5075 | |
|---|
| 5093 | 5076 | static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { |
|---|
| .. | .. |
|---|
| 5104 | 5087 | 5 + /* hdp invalidate */ |
|---|
| 5105 | 5088 | 7 + /* gfx_v7_0_ring_emit_pipeline_sync */ |
|---|
| 5106 | 5089 | CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */ |
|---|
| 5107 | | - 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ |
|---|
| 5108 | | - .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */ |
|---|
| 5090 | + 7 + 7 + 7 + /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ |
|---|
| 5091 | + 7, /* gfx_v7_0_emit_mem_sync_compute */ |
|---|
| 5092 | + .emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */ |
|---|
| 5109 | 5093 | .emit_ib = gfx_v7_0_ring_emit_ib_compute, |
|---|
| 5110 | 5094 | .emit_fence = gfx_v7_0_ring_emit_fence_compute, |
|---|
| 5111 | 5095 | .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync, |
|---|
| .. | .. |
|---|
| 5117 | 5101 | .insert_nop = amdgpu_ring_insert_nop, |
|---|
| 5118 | 5102 | .pad_ib = amdgpu_ring_generic_pad_ib, |
|---|
| 5119 | 5103 | .emit_wreg = gfx_v7_0_ring_emit_wreg, |
|---|
| 5104 | + .emit_mem_sync = gfx_v7_0_emit_mem_sync_compute, |
|---|
| 5120 | 5105 | }; |
|---|
| 5121 | 5106 | |
|---|
| 5122 | 5107 | static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) |
|---|
| .. | .. |
|---|
| 5159 | 5144 | static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev) |
|---|
| 5160 | 5145 | { |
|---|
| 5161 | 5146 | /* init asci gds info */ |
|---|
| 5162 | | - adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); |
|---|
| 5163 | | - adev->gds.gws.total_size = 64; |
|---|
| 5164 | | - adev->gds.oa.total_size = 16; |
|---|
| 5165 | | - |
|---|
| 5166 | | - if (adev->gds.mem.total_size == 64 * 1024) { |
|---|
| 5167 | | - adev->gds.mem.gfx_partition_size = 4096; |
|---|
| 5168 | | - adev->gds.mem.cs_partition_size = 4096; |
|---|
| 5169 | | - |
|---|
| 5170 | | - adev->gds.gws.gfx_partition_size = 4; |
|---|
| 5171 | | - adev->gds.gws.cs_partition_size = 4; |
|---|
| 5172 | | - |
|---|
| 5173 | | - adev->gds.oa.gfx_partition_size = 4; |
|---|
| 5174 | | - adev->gds.oa.cs_partition_size = 1; |
|---|
| 5175 | | - } else { |
|---|
| 5176 | | - adev->gds.mem.gfx_partition_size = 1024; |
|---|
| 5177 | | - adev->gds.mem.cs_partition_size = 1024; |
|---|
| 5178 | | - |
|---|
| 5179 | | - adev->gds.gws.gfx_partition_size = 16; |
|---|
| 5180 | | - adev->gds.gws.cs_partition_size = 16; |
|---|
| 5181 | | - |
|---|
| 5182 | | - adev->gds.oa.gfx_partition_size = 4; |
|---|
| 5183 | | - adev->gds.oa.cs_partition_size = 4; |
|---|
| 5184 | | - } |
|---|
| 5147 | + adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE); |
|---|
| 5148 | + adev->gds.gws_size = 64; |
|---|
| 5149 | + adev->gds.oa_size = 16; |
|---|
| 5150 | + adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); |
|---|
| 5185 | 5151 | } |
|---|
| 5186 | 5152 | |
|---|
| 5187 | 5153 | |
|---|
| .. | .. |
|---|
| 5241 | 5207 | cu_info->lds_size = 64; |
|---|
| 5242 | 5208 | } |
|---|
| 5243 | 5209 | |
|---|
| 5244 | | -const struct amdgpu_ip_block_version gfx_v7_0_ip_block = |
|---|
| 5210 | +static const struct amdgpu_ip_block_version gfx_v7_0_ip_block = |
|---|
| 5245 | 5211 | { |
|---|
| 5246 | 5212 | .type = AMD_IP_BLOCK_TYPE_GFX, |
|---|
| 5247 | 5213 | .major = 7, |
|---|