.. | .. |
---|
20 | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
---|
21 | 21 | * |
---|
22 | 22 | */ |
---|
| 23 | + |
---|
23 | 24 | #include <linux/firmware.h> |
---|
24 | | -#include <drm/drmP.h> |
---|
| 25 | +#include <linux/module.h> |
---|
| 26 | + |
---|
25 | 27 | #include "amdgpu.h" |
---|
26 | 28 | #include "amdgpu_ih.h" |
---|
27 | 29 | #include "amdgpu_gfx.h" |
---|
.. | .. |
---|
882 | 884 | |
---|
883 | 885 | static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev); |
---|
884 | 886 | static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer); |
---|
885 | | -static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev); |
---|
886 | 887 | static void gfx_v7_0_init_pg(struct amdgpu_device *adev); |
---|
887 | 888 | static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev); |
---|
888 | 889 | |
---|
.. | .. |
---|
1849 | 1850 | * |
---|
1850 | 1851 | */ |
---|
1851 | 1852 | #define DEFAULT_SH_MEM_BASES (0x6000) |
---|
1852 | | -#define FIRST_COMPUTE_VMID (8) |
---|
1853 | | -#define LAST_COMPUTE_VMID (16) |
---|
1854 | 1853 | static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) |
---|
1855 | 1854 | { |
---|
1856 | 1855 | int i; |
---|
.. | .. |
---|
1868 | 1867 | SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; |
---|
1869 | 1868 | sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT; |
---|
1870 | 1869 | mutex_lock(&adev->srbm_mutex); |
---|
1871 | | - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { |
---|
| 1870 | + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { |
---|
1872 | 1871 | cik_srbm_select(adev, 0, 0, 0, i); |
---|
1873 | 1872 | /* CP and shaders */ |
---|
1874 | 1873 | WREG32(mmSH_MEM_CONFIG, sh_mem_config); |
---|
.. | .. |
---|
1878 | 1877 | } |
---|
1879 | 1878 | cik_srbm_select(adev, 0, 0, 0, 0); |
---|
1880 | 1879 | mutex_unlock(&adev->srbm_mutex); |
---|
| 1880 | + |
---|
| 1881 | + /* Initialize all compute VMIDs to have no GDS, GWS, or OA |
---|
| 1882 | + acccess. These should be enabled by FW for target VMIDs. */ |
---|
| 1883 | + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { |
---|
| 1884 | + WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); |
---|
| 1885 | + WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); |
---|
| 1886 | + WREG32(amdgpu_gds_reg_offset[i].gws, 0); |
---|
| 1887 | + WREG32(amdgpu_gds_reg_offset[i].oa, 0); |
---|
| 1888 | + } |
---|
| 1889 | +} |
---|
| 1890 | + |
---|
| 1891 | +static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev) |
---|
| 1892 | +{ |
---|
| 1893 | + int vmid; |
---|
| 1894 | + |
---|
| 1895 | + /* |
---|
| 1896 | + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA |
---|
| 1897 | + * access. Compute VMIDs should be enabled by FW for target VMIDs, |
---|
| 1898 | + * the driver can enable them for graphics. VMID0 should maintain |
---|
| 1899 | + * access so that HWS firmware can save/restore entries. |
---|
| 1900 | + */ |
---|
| 1901 | + for (vmid = 1; vmid < 16; vmid++) { |
---|
| 1902 | + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); |
---|
| 1903 | + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); |
---|
| 1904 | + WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); |
---|
| 1905 | + WREG32(amdgpu_gds_reg_offset[vmid].oa, 0); |
---|
| 1906 | + } |
---|
1881 | 1907 | } |
---|
1882 | 1908 | |
---|
1883 | 1909 | static void gfx_v7_0_config_init(struct amdgpu_device *adev) |
---|
.. | .. |
---|
1886 | 1912 | } |
---|
1887 | 1913 | |
---|
1888 | 1914 | /** |
---|
1889 | | - * gfx_v7_0_gpu_init - setup the 3D engine |
---|
| 1915 | + * gfx_v7_0_constants_init - setup the 3D engine |
---|
1890 | 1916 | * |
---|
1891 | 1917 | * @adev: amdgpu_device pointer |
---|
1892 | 1918 | * |
---|
1893 | | - * Configures the 3D engine and tiling configuration |
---|
1894 | | - * registers so that the 3D engine is usable. |
---|
| 1919 | + * init the gfx constants such as the 3D engine, tiling configuration |
---|
| 1920 | + * registers, maximum number of quad pipes, render backends... |
---|
1895 | 1921 | */ |
---|
1896 | | -static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) |
---|
| 1922 | +static void gfx_v7_0_constants_init(struct amdgpu_device *adev) |
---|
1897 | 1923 | { |
---|
1898 | 1924 | u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base; |
---|
1899 | 1925 | u32 tmp; |
---|
.. | .. |
---|
1958 | 1984 | mutex_unlock(&adev->srbm_mutex); |
---|
1959 | 1985 | |
---|
1960 | 1986 | gfx_v7_0_init_compute_vmid(adev); |
---|
| 1987 | + gfx_v7_0_init_gds_vmid(adev); |
---|
1961 | 1988 | |
---|
1962 | 1989 | WREG32(mmSX_DEBUG_1, 0x20); |
---|
1963 | 1990 | |
---|
.. | .. |
---|
2064 | 2091 | int r; |
---|
2065 | 2092 | |
---|
2066 | 2093 | r = amdgpu_gfx_scratch_get(adev, &scratch); |
---|
2067 | | - if (r) { |
---|
2068 | | - DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); |
---|
| 2094 | + if (r) |
---|
2069 | 2095 | return r; |
---|
2070 | | - } |
---|
| 2096 | + |
---|
2071 | 2097 | WREG32(scratch, 0xCAFEDEAD); |
---|
2072 | 2098 | r = amdgpu_ring_alloc(ring, 3); |
---|
2073 | | - if (r) { |
---|
2074 | | - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); |
---|
2075 | | - amdgpu_gfx_scratch_free(adev, scratch); |
---|
2076 | | - return r; |
---|
2077 | | - } |
---|
| 2099 | + if (r) |
---|
| 2100 | + goto error_free_scratch; |
---|
| 2101 | + |
---|
2078 | 2102 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); |
---|
2079 | 2103 | amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); |
---|
2080 | 2104 | amdgpu_ring_write(ring, 0xDEADBEEF); |
---|
.. | .. |
---|
2084 | 2108 | tmp = RREG32(scratch); |
---|
2085 | 2109 | if (tmp == 0xDEADBEEF) |
---|
2086 | 2110 | break; |
---|
2087 | | - DRM_UDELAY(1); |
---|
| 2111 | + udelay(1); |
---|
2088 | 2112 | } |
---|
2089 | | - if (i < adev->usec_timeout) { |
---|
2090 | | - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); |
---|
2091 | | - } else { |
---|
2092 | | - DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", |
---|
2093 | | - ring->idx, scratch, tmp); |
---|
2094 | | - r = -EINVAL; |
---|
2095 | | - } |
---|
| 2113 | + if (i >= adev->usec_timeout) |
---|
| 2114 | + r = -ETIMEDOUT; |
---|
| 2115 | + |
---|
| 2116 | +error_free_scratch: |
---|
2096 | 2117 | amdgpu_gfx_scratch_free(adev, scratch); |
---|
2097 | 2118 | return r; |
---|
2098 | 2119 | } |
---|
.. | .. |
---|
2233 | 2254 | * on the gfx ring for execution by the GPU. |
---|
2234 | 2255 | */ |
---|
2235 | 2256 | static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, |
---|
2236 | | - struct amdgpu_ib *ib, |
---|
2237 | | - unsigned vmid, bool ctx_switch) |
---|
| 2257 | + struct amdgpu_job *job, |
---|
| 2258 | + struct amdgpu_ib *ib, |
---|
| 2259 | + uint32_t flags) |
---|
2238 | 2260 | { |
---|
| 2261 | + unsigned vmid = AMDGPU_JOB_GET_VMID(job); |
---|
2239 | 2262 | u32 header, control = 0; |
---|
2240 | 2263 | |
---|
2241 | 2264 | /* insert SWITCH_BUFFER packet before first IB in the ring frame */ |
---|
2242 | | - if (ctx_switch) { |
---|
| 2265 | + if (flags & AMDGPU_HAVE_CTX_SWITCH) { |
---|
2243 | 2266 | amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); |
---|
2244 | 2267 | amdgpu_ring_write(ring, 0); |
---|
2245 | 2268 | } |
---|
.. | .. |
---|
2262 | 2285 | } |
---|
2263 | 2286 | |
---|
2264 | 2287 | static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, |
---|
| 2288 | + struct amdgpu_job *job, |
---|
2265 | 2289 | struct amdgpu_ib *ib, |
---|
2266 | | - unsigned vmid, bool ctx_switch) |
---|
| 2290 | + uint32_t flags) |
---|
2267 | 2291 | { |
---|
| 2292 | + unsigned vmid = AMDGPU_JOB_GET_VMID(job); |
---|
2268 | 2293 | u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); |
---|
| 2294 | + |
---|
| 2295 | + /* Currently, there is a high possibility to get wave ID mismatch |
---|
| 2296 | + * between ME and GDS, leading to a hw deadlock, because ME generates |
---|
| 2297 | + * different wave IDs than the GDS expects. This situation happens |
---|
| 2298 | + * randomly when at least 5 compute pipes use GDS ordered append. |
---|
| 2299 | + * The wave IDs generated by ME are also wrong after suspend/resume. |
---|
| 2300 | + * Those are probably bugs somewhere else in the kernel driver. |
---|
| 2301 | + * |
---|
| 2302 | + * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and |
---|
| 2303 | + * GDS to 0 for this ring (me/pipe). |
---|
| 2304 | + */ |
---|
| 2305 | + if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { |
---|
| 2306 | + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); |
---|
| 2307 | + amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START); |
---|
| 2308 | + amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); |
---|
| 2309 | + } |
---|
2269 | 2310 | |
---|
2270 | 2311 | amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); |
---|
2271 | 2312 | amdgpu_ring_write(ring, |
---|
.. | .. |
---|
2316 | 2357 | long r; |
---|
2317 | 2358 | |
---|
2318 | 2359 | r = amdgpu_gfx_scratch_get(adev, &scratch); |
---|
2319 | | - if (r) { |
---|
2320 | | - DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); |
---|
| 2360 | + if (r) |
---|
2321 | 2361 | return r; |
---|
2322 | | - } |
---|
| 2362 | + |
---|
2323 | 2363 | WREG32(scratch, 0xCAFEDEAD); |
---|
2324 | 2364 | memset(&ib, 0, sizeof(ib)); |
---|
2325 | | - r = amdgpu_ib_get(adev, NULL, 256, &ib); |
---|
2326 | | - if (r) { |
---|
2327 | | - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); |
---|
| 2365 | + r = amdgpu_ib_get(adev, NULL, 256, |
---|
| 2366 | + AMDGPU_IB_POOL_DIRECT, &ib); |
---|
| 2367 | + if (r) |
---|
2328 | 2368 | goto err1; |
---|
2329 | | - } |
---|
| 2369 | + |
---|
2330 | 2370 | ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); |
---|
2331 | 2371 | ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); |
---|
2332 | 2372 | ib.ptr[2] = 0xDEADBEEF; |
---|
.. | .. |
---|
2338 | 2378 | |
---|
2339 | 2379 | r = dma_fence_wait_timeout(f, false, timeout); |
---|
2340 | 2380 | if (r == 0) { |
---|
2341 | | - DRM_ERROR("amdgpu: IB test timed out\n"); |
---|
2342 | 2381 | r = -ETIMEDOUT; |
---|
2343 | 2382 | goto err2; |
---|
2344 | 2383 | } else if (r < 0) { |
---|
2345 | | - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); |
---|
2346 | 2384 | goto err2; |
---|
2347 | 2385 | } |
---|
2348 | 2386 | tmp = RREG32(scratch); |
---|
2349 | | - if (tmp == 0xDEADBEEF) { |
---|
2350 | | - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); |
---|
| 2387 | + if (tmp == 0xDEADBEEF) |
---|
2351 | 2388 | r = 0; |
---|
2352 | | - } else { |
---|
2353 | | - DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", |
---|
2354 | | - scratch, tmp); |
---|
| 2389 | + else |
---|
2355 | 2390 | r = -EINVAL; |
---|
2356 | | - } |
---|
2357 | 2391 | |
---|
2358 | 2392 | err2: |
---|
2359 | 2393 | amdgpu_ib_free(adev, &ib, NULL); |
---|
.. | .. |
---|
2396 | 2430 | */ |
---|
2397 | 2431 | static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) |
---|
2398 | 2432 | { |
---|
2399 | | - int i; |
---|
2400 | | - |
---|
2401 | | - if (enable) { |
---|
| 2433 | + if (enable) |
---|
2402 | 2434 | WREG32(mmCP_ME_CNTL, 0); |
---|
2403 | | - } else { |
---|
2404 | | - WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK)); |
---|
2405 | | - for (i = 0; i < adev->gfx.num_gfx_rings; i++) |
---|
2406 | | - adev->gfx.gfx_ring[i].ready = false; |
---|
2407 | | - } |
---|
| 2435 | + else |
---|
| 2436 | + WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | |
---|
| 2437 | + CP_ME_CNTL__PFP_HALT_MASK | |
---|
| 2438 | + CP_ME_CNTL__CE_HALT_MASK)); |
---|
2408 | 2439 | udelay(50); |
---|
2409 | 2440 | } |
---|
2410 | 2441 | |
---|
.. | .. |
---|
2613 | 2644 | |
---|
2614 | 2645 | /* start the ring */ |
---|
2615 | 2646 | gfx_v7_0_cp_gfx_start(adev); |
---|
2616 | | - ring->ready = true; |
---|
2617 | | - r = amdgpu_ring_test_ring(ring); |
---|
2618 | | - if (r) { |
---|
2619 | | - ring->ready = false; |
---|
| 2647 | + r = amdgpu_ring_test_helper(ring); |
---|
| 2648 | + if (r) |
---|
2620 | 2649 | return r; |
---|
2621 | | - } |
---|
2622 | 2650 | |
---|
2623 | 2651 | return 0; |
---|
2624 | 2652 | } |
---|
.. | .. |
---|
2668 | 2696 | */ |
---|
2669 | 2697 | static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) |
---|
2670 | 2698 | { |
---|
2671 | | - int i; |
---|
2672 | | - |
---|
2673 | | - if (enable) { |
---|
| 2699 | + if (enable) |
---|
2674 | 2700 | WREG32(mmCP_MEC_CNTL, 0); |
---|
2675 | | - } else { |
---|
2676 | | - WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); |
---|
2677 | | - for (i = 0; i < adev->gfx.num_compute_rings; i++) |
---|
2678 | | - adev->gfx.compute_ring[i].ready = false; |
---|
2679 | | - } |
---|
| 2701 | + else |
---|
| 2702 | + WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | |
---|
| 2703 | + CP_MEC_CNTL__MEC_ME2_HALT_MASK)); |
---|
2680 | 2704 | udelay(50); |
---|
2681 | 2705 | } |
---|
2682 | 2706 | |
---|
.. | .. |
---|
2781 | 2805 | * GFX7_MEC_HPD_SIZE * 2; |
---|
2782 | 2806 | |
---|
2783 | 2807 | r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, |
---|
2784 | | - AMDGPU_GEM_DOMAIN_GTT, |
---|
| 2808 | + AMDGPU_GEM_DOMAIN_VRAM, |
---|
2785 | 2809 | &adev->gfx.mec.hpd_eop_obj, |
---|
2786 | 2810 | &adev->gfx.mec.hpd_eop_gpu_addr, |
---|
2787 | 2811 | (void **)&hpd); |
---|
.. | .. |
---|
3013 | 3037 | mqd->cp_hqd_active = 1; |
---|
3014 | 3038 | } |
---|
3015 | 3039 | |
---|
3016 | | -int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd) |
---|
| 3040 | +static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd) |
---|
3017 | 3041 | { |
---|
3018 | 3042 | uint32_t tmp; |
---|
3019 | 3043 | uint32_t mqd_reg; |
---|
.. | .. |
---|
3106 | 3130 | |
---|
3107 | 3131 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
---|
3108 | 3132 | ring = &adev->gfx.compute_ring[i]; |
---|
3109 | | - ring->ready = true; |
---|
3110 | | - r = amdgpu_ring_test_ring(ring); |
---|
3111 | | - if (r) |
---|
3112 | | - ring->ready = false; |
---|
| 3133 | + amdgpu_ring_test_helper(ring); |
---|
3113 | 3134 | } |
---|
3114 | 3135 | |
---|
3115 | 3136 | return 0; |
---|
.. | .. |
---|
3268 | 3289 | * The RLC is a multi-purpose microengine that handles a |
---|
3269 | 3290 | * variety of functions. |
---|
3270 | 3291 | */ |
---|
3271 | | -static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev) |
---|
3272 | | -{ |
---|
3273 | | - amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL); |
---|
3274 | | - amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); |
---|
3275 | | - amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); |
---|
3276 | | -} |
---|
3277 | | - |
---|
3278 | 3292 | static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) |
---|
3279 | 3293 | { |
---|
3280 | 3294 | const u32 *src_ptr; |
---|
3281 | | - volatile u32 *dst_ptr; |
---|
3282 | | - u32 dws, i; |
---|
| 3295 | + u32 dws; |
---|
3283 | 3296 | const struct cs_section_def *cs_data; |
---|
3284 | 3297 | int r; |
---|
3285 | 3298 | |
---|
.. | .. |
---|
3306 | 3319 | cs_data = adev->gfx.rlc.cs_data; |
---|
3307 | 3320 | |
---|
3308 | 3321 | if (src_ptr) { |
---|
3309 | | - /* save restore block */ |
---|
3310 | | - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, |
---|
3311 | | - AMDGPU_GEM_DOMAIN_VRAM, |
---|
3312 | | - &adev->gfx.rlc.save_restore_obj, |
---|
3313 | | - &adev->gfx.rlc.save_restore_gpu_addr, |
---|
3314 | | - (void **)&adev->gfx.rlc.sr_ptr); |
---|
3315 | | - if (r) { |
---|
3316 | | - dev_warn(adev->dev, "(%d) create, pin or map of RLC sr bo failed\n", r); |
---|
3317 | | - gfx_v7_0_rlc_fini(adev); |
---|
| 3322 | + /* init save restore block */ |
---|
| 3323 | + r = amdgpu_gfx_rlc_init_sr(adev, dws); |
---|
| 3324 | + if (r) |
---|
3318 | 3325 | return r; |
---|
3319 | | - } |
---|
3320 | | - |
---|
3321 | | - /* write the sr buffer */ |
---|
3322 | | - dst_ptr = adev->gfx.rlc.sr_ptr; |
---|
3323 | | - for (i = 0; i < adev->gfx.rlc.reg_list_size; i++) |
---|
3324 | | - dst_ptr[i] = cpu_to_le32(src_ptr[i]); |
---|
3325 | | - amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj); |
---|
3326 | | - amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); |
---|
3327 | 3326 | } |
---|
3328 | 3327 | |
---|
3329 | 3328 | if (cs_data) { |
---|
3330 | | - /* clear state block */ |
---|
3331 | | - adev->gfx.rlc.clear_state_size = dws = gfx_v7_0_get_csb_size(adev); |
---|
3332 | | - |
---|
3333 | | - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, |
---|
3334 | | - AMDGPU_GEM_DOMAIN_VRAM, |
---|
3335 | | - &adev->gfx.rlc.clear_state_obj, |
---|
3336 | | - &adev->gfx.rlc.clear_state_gpu_addr, |
---|
3337 | | - (void **)&adev->gfx.rlc.cs_ptr); |
---|
3338 | | - if (r) { |
---|
3339 | | - dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); |
---|
3340 | | - gfx_v7_0_rlc_fini(adev); |
---|
| 3329 | + /* init clear state block */ |
---|
| 3330 | + r = amdgpu_gfx_rlc_init_csb(adev); |
---|
| 3331 | + if (r) |
---|
3341 | 3332 | return r; |
---|
3342 | | - } |
---|
3343 | | - |
---|
3344 | | - /* set up the cs buffer */ |
---|
3345 | | - dst_ptr = adev->gfx.rlc.cs_ptr; |
---|
3346 | | - gfx_v7_0_get_csb_buffer(adev, dst_ptr); |
---|
3347 | | - amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); |
---|
3348 | | - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); |
---|
3349 | 3333 | } |
---|
3350 | 3334 | |
---|
3351 | 3335 | if (adev->gfx.rlc.cp_table_size) { |
---|
3352 | | - |
---|
3353 | | - r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, |
---|
3354 | | - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, |
---|
3355 | | - &adev->gfx.rlc.cp_table_obj, |
---|
3356 | | - &adev->gfx.rlc.cp_table_gpu_addr, |
---|
3357 | | - (void **)&adev->gfx.rlc.cp_table_ptr); |
---|
3358 | | - if (r) { |
---|
3359 | | - dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); |
---|
3360 | | - gfx_v7_0_rlc_fini(adev); |
---|
| 3336 | + r = amdgpu_gfx_rlc_init_cpt(adev); |
---|
| 3337 | + if (r) |
---|
3361 | 3338 | return r; |
---|
3362 | | - } |
---|
3363 | | - |
---|
3364 | | - gfx_v7_0_init_cp_pg_table(adev); |
---|
3365 | | - |
---|
3366 | | - amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); |
---|
3367 | | - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); |
---|
3368 | | - |
---|
3369 | 3339 | } |
---|
| 3340 | + |
---|
| 3341 | + /* init spm vmid with 0xf */ |
---|
| 3342 | + if (adev->gfx.rlc.funcs->update_spm_vmid) |
---|
| 3343 | + adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); |
---|
3370 | 3344 | |
---|
3371 | 3345 | return 0; |
---|
3372 | 3346 | } |
---|
.. | .. |
---|
3446 | 3420 | return orig; |
---|
3447 | 3421 | } |
---|
3448 | 3422 | |
---|
3449 | | -static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev) |
---|
| 3423 | +static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev) |
---|
| 3424 | +{ |
---|
| 3425 | + return true; |
---|
| 3426 | +} |
---|
| 3427 | + |
---|
| 3428 | +static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev) |
---|
3450 | 3429 | { |
---|
3451 | 3430 | u32 tmp, i, mask; |
---|
3452 | 3431 | |
---|
.. | .. |
---|
3468 | 3447 | } |
---|
3469 | 3448 | } |
---|
3470 | 3449 | |
---|
3471 | | -static void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev) |
---|
| 3450 | +static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev) |
---|
3472 | 3451 | { |
---|
3473 | 3452 | u32 tmp; |
---|
3474 | 3453 | |
---|
.. | .. |
---|
3545 | 3524 | adev->gfx.rlc_feature_version = le32_to_cpu( |
---|
3546 | 3525 | hdr->ucode_feature_version); |
---|
3547 | 3526 | |
---|
3548 | | - gfx_v7_0_rlc_stop(adev); |
---|
| 3527 | + adev->gfx.rlc.funcs->stop(adev); |
---|
3549 | 3528 | |
---|
3550 | 3529 | /* disable CG */ |
---|
3551 | 3530 | tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc; |
---|
3552 | 3531 | WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); |
---|
3553 | 3532 | |
---|
3554 | | - gfx_v7_0_rlc_reset(adev); |
---|
| 3533 | + adev->gfx.rlc.funcs->reset(adev); |
---|
3555 | 3534 | |
---|
3556 | 3535 | gfx_v7_0_init_pg(adev); |
---|
3557 | 3536 | |
---|
.. | .. |
---|
3582 | 3561 | if (adev->asic_type == CHIP_BONAIRE) |
---|
3583 | 3562 | WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0); |
---|
3584 | 3563 | |
---|
3585 | | - gfx_v7_0_rlc_start(adev); |
---|
| 3564 | + adev->gfx.rlc.funcs->start(adev); |
---|
3586 | 3565 | |
---|
3587 | 3566 | return 0; |
---|
| 3567 | +} |
---|
| 3568 | + |
---|
| 3569 | +static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) |
---|
| 3570 | +{ |
---|
| 3571 | + u32 data; |
---|
| 3572 | + |
---|
| 3573 | + data = RREG32(mmRLC_SPM_VMID); |
---|
| 3574 | + |
---|
| 3575 | + data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK; |
---|
| 3576 | + data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT; |
---|
| 3577 | + |
---|
| 3578 | + WREG32(mmRLC_SPM_VMID, data); |
---|
3588 | 3579 | } |
---|
3589 | 3580 | |
---|
3590 | 3581 | static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) |
---|
.. | .. |
---|
3784 | 3775 | WREG32(mmRLC_PG_CNTL, data); |
---|
3785 | 3776 | } |
---|
3786 | 3777 | |
---|
3787 | | -static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev) |
---|
| 3778 | +static int gfx_v7_0_cp_pg_table_num(struct amdgpu_device *adev) |
---|
3788 | 3779 | { |
---|
3789 | | - const __le32 *fw_data; |
---|
3790 | | - volatile u32 *dst_ptr; |
---|
3791 | | - int me, i, max_me = 4; |
---|
3792 | | - u32 bo_offset = 0; |
---|
3793 | | - u32 table_offset, table_size; |
---|
3794 | | - |
---|
3795 | 3780 | if (adev->asic_type == CHIP_KAVERI) |
---|
3796 | | - max_me = 5; |
---|
3797 | | - |
---|
3798 | | - if (adev->gfx.rlc.cp_table_ptr == NULL) |
---|
3799 | | - return; |
---|
3800 | | - |
---|
3801 | | - /* write the cp table buffer */ |
---|
3802 | | - dst_ptr = adev->gfx.rlc.cp_table_ptr; |
---|
3803 | | - for (me = 0; me < max_me; me++) { |
---|
3804 | | - if (me == 0) { |
---|
3805 | | - const struct gfx_firmware_header_v1_0 *hdr = |
---|
3806 | | - (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; |
---|
3807 | | - fw_data = (const __le32 *) |
---|
3808 | | - (adev->gfx.ce_fw->data + |
---|
3809 | | - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
---|
3810 | | - table_offset = le32_to_cpu(hdr->jt_offset); |
---|
3811 | | - table_size = le32_to_cpu(hdr->jt_size); |
---|
3812 | | - } else if (me == 1) { |
---|
3813 | | - const struct gfx_firmware_header_v1_0 *hdr = |
---|
3814 | | - (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; |
---|
3815 | | - fw_data = (const __le32 *) |
---|
3816 | | - (adev->gfx.pfp_fw->data + |
---|
3817 | | - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
---|
3818 | | - table_offset = le32_to_cpu(hdr->jt_offset); |
---|
3819 | | - table_size = le32_to_cpu(hdr->jt_size); |
---|
3820 | | - } else if (me == 2) { |
---|
3821 | | - const struct gfx_firmware_header_v1_0 *hdr = |
---|
3822 | | - (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; |
---|
3823 | | - fw_data = (const __le32 *) |
---|
3824 | | - (adev->gfx.me_fw->data + |
---|
3825 | | - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
---|
3826 | | - table_offset = le32_to_cpu(hdr->jt_offset); |
---|
3827 | | - table_size = le32_to_cpu(hdr->jt_size); |
---|
3828 | | - } else if (me == 3) { |
---|
3829 | | - const struct gfx_firmware_header_v1_0 *hdr = |
---|
3830 | | - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; |
---|
3831 | | - fw_data = (const __le32 *) |
---|
3832 | | - (adev->gfx.mec_fw->data + |
---|
3833 | | - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
---|
3834 | | - table_offset = le32_to_cpu(hdr->jt_offset); |
---|
3835 | | - table_size = le32_to_cpu(hdr->jt_size); |
---|
3836 | | - } else { |
---|
3837 | | - const struct gfx_firmware_header_v1_0 *hdr = |
---|
3838 | | - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; |
---|
3839 | | - fw_data = (const __le32 *) |
---|
3840 | | - (adev->gfx.mec2_fw->data + |
---|
3841 | | - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
---|
3842 | | - table_offset = le32_to_cpu(hdr->jt_offset); |
---|
3843 | | - table_size = le32_to_cpu(hdr->jt_size); |
---|
3844 | | - } |
---|
3845 | | - |
---|
3846 | | - for (i = 0; i < table_size; i ++) { |
---|
3847 | | - dst_ptr[bo_offset + i] = |
---|
3848 | | - cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); |
---|
3849 | | - } |
---|
3850 | | - |
---|
3851 | | - bo_offset += table_size; |
---|
3852 | | - } |
---|
| 3781 | + return 5; |
---|
| 3782 | + else |
---|
| 3783 | + return 4; |
---|
3853 | 3784 | } |
---|
3854 | 3785 | |
---|
3855 | 3786 | static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev, |
---|
.. | .. |
---|
4170 | 4101 | uint32_t gws_base, uint32_t gws_size, |
---|
4171 | 4102 | uint32_t oa_base, uint32_t oa_size) |
---|
4172 | 4103 | { |
---|
4173 | | - gds_base = gds_base >> AMDGPU_GDS_SHIFT; |
---|
4174 | | - gds_size = gds_size >> AMDGPU_GDS_SHIFT; |
---|
4175 | | - |
---|
4176 | | - gws_base = gws_base >> AMDGPU_GWS_SHIFT; |
---|
4177 | | - gws_size = gws_size >> AMDGPU_GWS_SHIFT; |
---|
4178 | | - |
---|
4179 | | - oa_base = oa_base >> AMDGPU_OA_SHIFT; |
---|
4180 | | - oa_size = oa_size >> AMDGPU_OA_SHIFT; |
---|
4181 | | - |
---|
4182 | 4104 | /* GDS Base */ |
---|
4183 | 4105 | amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); |
---|
4184 | 4106 | amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | |
---|
.. | .. |
---|
4210 | 4132 | amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); |
---|
4211 | 4133 | amdgpu_ring_write(ring, 0); |
---|
4212 | 4134 | amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); |
---|
| 4135 | +} |
---|
| 4136 | + |
---|
| 4137 | +static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) |
---|
| 4138 | +{ |
---|
| 4139 | + struct amdgpu_device *adev = ring->adev; |
---|
| 4140 | + uint32_t value = 0; |
---|
| 4141 | + |
---|
| 4142 | + value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); |
---|
| 4143 | + value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); |
---|
| 4144 | + value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); |
---|
| 4145 | + value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); |
---|
| 4146 | + WREG32(mmSQ_CMD, value); |
---|
4213 | 4147 | } |
---|
4214 | 4148 | |
---|
4215 | 4149 | static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) |
---|
.. | .. |
---|
4271 | 4205 | } |
---|
4272 | 4206 | |
---|
4273 | 4207 | static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev, |
---|
4274 | | - u32 me, u32 pipe, u32 q) |
---|
| 4208 | + u32 me, u32 pipe, u32 q, u32 vm) |
---|
4275 | 4209 | { |
---|
4276 | | - cik_srbm_select(adev, me, pipe, q, 0); |
---|
| 4210 | + cik_srbm_select(adev, me, pipe, q, vm); |
---|
4277 | 4211 | } |
---|
4278 | 4212 | |
---|
4279 | 4213 | static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = { |
---|
.. | .. |
---|
4285 | 4219 | }; |
---|
4286 | 4220 | |
---|
4287 | 4221 | static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = { |
---|
4288 | | - .enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode, |
---|
4289 | | - .exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode |
---|
| 4222 | + .is_rlc_enabled = gfx_v7_0_is_rlc_enabled, |
---|
| 4223 | + .set_safe_mode = gfx_v7_0_set_safe_mode, |
---|
| 4224 | + .unset_safe_mode = gfx_v7_0_unset_safe_mode, |
---|
| 4225 | + .init = gfx_v7_0_rlc_init, |
---|
| 4226 | + .get_csb_size = gfx_v7_0_get_csb_size, |
---|
| 4227 | + .get_csb_buffer = gfx_v7_0_get_csb_buffer, |
---|
| 4228 | + .get_cp_table_num = gfx_v7_0_cp_pg_table_num, |
---|
| 4229 | + .resume = gfx_v7_0_rlc_resume, |
---|
| 4230 | + .stop = gfx_v7_0_rlc_stop, |
---|
| 4231 | + .reset = gfx_v7_0_rlc_reset, |
---|
| 4232 | + .start = gfx_v7_0_rlc_start, |
---|
| 4233 | + .update_spm_vmid = gfx_v7_0_update_spm_vmid |
---|
4290 | 4234 | }; |
---|
4291 | 4235 | |
---|
4292 | 4236 | static int gfx_v7_0_early_init(void *handle) |
---|
.. | .. |
---|
4323 | 4267 | static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) |
---|
4324 | 4268 | { |
---|
4325 | 4269 | u32 gb_addr_config; |
---|
4326 | | - u32 mc_shared_chmap, mc_arb_ramcfg; |
---|
| 4270 | + u32 mc_arb_ramcfg; |
---|
4327 | 4271 | u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; |
---|
4328 | 4272 | u32 tmp; |
---|
4329 | 4273 | |
---|
.. | .. |
---|
4400 | 4344 | break; |
---|
4401 | 4345 | } |
---|
4402 | 4346 | |
---|
4403 | | - mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); |
---|
4404 | 4347 | adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); |
---|
4405 | 4348 | mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; |
---|
| 4349 | + |
---|
| 4350 | + adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg, |
---|
| 4351 | + MC_ARB_RAMCFG, NOOFBANK); |
---|
| 4352 | + adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg, |
---|
| 4353 | + MC_ARB_RAMCFG, NOOFRANKS); |
---|
4406 | 4354 | |
---|
4407 | 4355 | adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; |
---|
4408 | 4356 | adev->gfx.config.mem_max_burst_length_bytes = 256; |
---|
.. | .. |
---|
4474 | 4422 | |
---|
4475 | 4423 | ring->ring_obj = NULL; |
---|
4476 | 4424 | ring->use_doorbell = true; |
---|
4477 | | - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; |
---|
| 4425 | + ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id; |
---|
4478 | 4426 | sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); |
---|
4479 | 4427 | |
---|
4480 | 4428 | irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP |
---|
.. | .. |
---|
4483 | 4431 | |
---|
4484 | 4432 | /* type-2 packets are deprecated on MEC, use type-3 instead */ |
---|
4485 | 4433 | r = amdgpu_ring_init(adev, ring, 1024, |
---|
4486 | | - &adev->gfx.eop_irq, irq_type); |
---|
| 4434 | + &adev->gfx.eop_irq, irq_type, |
---|
| 4435 | + AMDGPU_RING_PRIO_DEFAULT); |
---|
4487 | 4436 | if (r) |
---|
4488 | 4437 | return r; |
---|
4489 | 4438 | |
---|
.. | .. |
---|
4513 | 4462 | adev->gfx.mec.num_queue_per_pipe = 8; |
---|
4514 | 4463 | |
---|
4515 | 4464 | /* EOP Event */ |
---|
4516 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); |
---|
| 4465 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); |
---|
4517 | 4466 | if (r) |
---|
4518 | 4467 | return r; |
---|
4519 | 4468 | |
---|
4520 | 4469 | /* Privileged reg */ |
---|
4521 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, |
---|
| 4470 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184, |
---|
4522 | 4471 | &adev->gfx.priv_reg_irq); |
---|
4523 | 4472 | if (r) |
---|
4524 | 4473 | return r; |
---|
4525 | 4474 | |
---|
4526 | 4475 | /* Privileged inst */ |
---|
4527 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, |
---|
| 4476 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185, |
---|
4528 | 4477 | &adev->gfx.priv_inst_irq); |
---|
4529 | 4478 | if (r) |
---|
4530 | 4479 | return r; |
---|
.. | .. |
---|
4537 | 4486 | return r; |
---|
4538 | 4487 | } |
---|
4539 | 4488 | |
---|
4540 | | - r = gfx_v7_0_rlc_init(adev); |
---|
| 4489 | + r = adev->gfx.rlc.funcs->init(adev); |
---|
4541 | 4490 | if (r) { |
---|
4542 | 4491 | DRM_ERROR("Failed to init rlc BOs!\n"); |
---|
4543 | 4492 | return r; |
---|
.. | .. |
---|
4555 | 4504 | ring->ring_obj = NULL; |
---|
4556 | 4505 | sprintf(ring->name, "gfx"); |
---|
4557 | 4506 | r = amdgpu_ring_init(adev, ring, 1024, |
---|
4558 | | - &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); |
---|
| 4507 | + &adev->gfx.eop_irq, |
---|
| 4508 | + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, |
---|
| 4509 | + AMDGPU_RING_PRIO_DEFAULT); |
---|
4559 | 4510 | if (r) |
---|
4560 | 4511 | return r; |
---|
4561 | 4512 | } |
---|
.. | .. |
---|
4579 | 4530 | } |
---|
4580 | 4531 | } |
---|
4581 | 4532 | |
---|
4582 | | - /* reserve GDS, GWS and OA resource for gfx */ |
---|
4583 | | - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, |
---|
4584 | | - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, |
---|
4585 | | - &adev->gds.gds_gfx_bo, NULL, NULL); |
---|
4586 | | - if (r) |
---|
4587 | | - return r; |
---|
4588 | | - |
---|
4589 | | - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, |
---|
4590 | | - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, |
---|
4591 | | - &adev->gds.gws_gfx_bo, NULL, NULL); |
---|
4592 | | - if (r) |
---|
4593 | | - return r; |
---|
4594 | | - |
---|
4595 | | - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, |
---|
4596 | | - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, |
---|
4597 | | - &adev->gds.oa_gfx_bo, NULL, NULL); |
---|
4598 | | - if (r) |
---|
4599 | | - return r; |
---|
4600 | | - |
---|
4601 | 4533 | adev->gfx.ce_ram_size = 0x8000; |
---|
4602 | 4534 | |
---|
4603 | 4535 | gfx_v7_0_gpu_early_init(adev); |
---|
.. | .. |
---|
4607 | 4539 | |
---|
4608 | 4540 | static int gfx_v7_0_sw_fini(void *handle) |
---|
4609 | 4541 | { |
---|
4610 | | - int i; |
---|
4611 | 4542 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
---|
4612 | | - |
---|
4613 | | - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); |
---|
4614 | | - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); |
---|
4615 | | - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); |
---|
| 4543 | + int i; |
---|
4616 | 4544 | |
---|
4617 | 4545 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) |
---|
4618 | 4546 | amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); |
---|
.. | .. |
---|
4620 | 4548 | amdgpu_ring_fini(&adev->gfx.compute_ring[i]); |
---|
4621 | 4549 | |
---|
4622 | 4550 | gfx_v7_0_cp_compute_fini(adev); |
---|
4623 | | - gfx_v7_0_rlc_fini(adev); |
---|
| 4551 | + amdgpu_gfx_rlc_fini(adev); |
---|
4624 | 4552 | gfx_v7_0_mec_fini(adev); |
---|
4625 | 4553 | amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, |
---|
4626 | 4554 | &adev->gfx.rlc.clear_state_gpu_addr, |
---|
.. | .. |
---|
4640 | 4568 | int r; |
---|
4641 | 4569 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
---|
4642 | 4570 | |
---|
4643 | | - gfx_v7_0_gpu_init(adev); |
---|
| 4571 | + gfx_v7_0_constants_init(adev); |
---|
4644 | 4572 | |
---|
| 4573 | + /* init CSB */ |
---|
| 4574 | + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); |
---|
4645 | 4575 | /* init rlc */ |
---|
4646 | | - r = gfx_v7_0_rlc_resume(adev); |
---|
| 4576 | + r = adev->gfx.rlc.funcs->resume(adev); |
---|
4647 | 4577 | if (r) |
---|
4648 | 4578 | return r; |
---|
4649 | 4579 | |
---|
.. | .. |
---|
4661 | 4591 | amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); |
---|
4662 | 4592 | amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); |
---|
4663 | 4593 | gfx_v7_0_cp_enable(adev, false); |
---|
4664 | | - gfx_v7_0_rlc_stop(adev); |
---|
| 4594 | + adev->gfx.rlc.funcs->stop(adev); |
---|
4665 | 4595 | gfx_v7_0_fini_pg(adev); |
---|
4666 | 4596 | |
---|
4667 | 4597 | return 0; |
---|
.. | .. |
---|
4746 | 4676 | gfx_v7_0_update_cg(adev, false); |
---|
4747 | 4677 | |
---|
4748 | 4678 | /* stop the rlc */ |
---|
4749 | | - gfx_v7_0_rlc_stop(adev); |
---|
| 4679 | + adev->gfx.rlc.funcs->stop(adev); |
---|
4750 | 4680 | |
---|
4751 | 4681 | /* Disable GFX parsing/prefetching */ |
---|
4752 | 4682 | WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK); |
---|
.. | .. |
---|
4915 | 4845 | enum amdgpu_interrupt_state state) |
---|
4916 | 4846 | { |
---|
4917 | 4847 | switch (type) { |
---|
4918 | | - case AMDGPU_CP_IRQ_GFX_EOP: |
---|
| 4848 | + case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: |
---|
4919 | 4849 | gfx_v7_0_set_gfx_eop_interrupt_state(adev, state); |
---|
4920 | 4850 | break; |
---|
4921 | 4851 | case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: |
---|
.. | .. |
---|
4975 | 4905 | return 0; |
---|
4976 | 4906 | } |
---|
4977 | 4907 | |
---|
| 4908 | +static void gfx_v7_0_fault(struct amdgpu_device *adev, |
---|
| 4909 | + struct amdgpu_iv_entry *entry) |
---|
| 4910 | +{ |
---|
| 4911 | + struct amdgpu_ring *ring; |
---|
| 4912 | + u8 me_id, pipe_id; |
---|
| 4913 | + int i; |
---|
| 4914 | + |
---|
| 4915 | + me_id = (entry->ring_id & 0x0c) >> 2; |
---|
| 4916 | + pipe_id = (entry->ring_id & 0x03) >> 0; |
---|
| 4917 | + switch (me_id) { |
---|
| 4918 | + case 0: |
---|
| 4919 | + drm_sched_fault(&adev->gfx.gfx_ring[0].sched); |
---|
| 4920 | + break; |
---|
| 4921 | + case 1: |
---|
| 4922 | + case 2: |
---|
| 4923 | + for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
---|
| 4924 | + ring = &adev->gfx.compute_ring[i]; |
---|
| 4925 | + if ((ring->me == me_id) && (ring->pipe == pipe_id)) |
---|
| 4926 | + drm_sched_fault(&ring->sched); |
---|
| 4927 | + } |
---|
| 4928 | + break; |
---|
| 4929 | + } |
---|
| 4930 | +} |
---|
| 4931 | + |
---|
4978 | 4932 | static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev, |
---|
4979 | 4933 | struct amdgpu_irq_src *source, |
---|
4980 | 4934 | struct amdgpu_iv_entry *entry) |
---|
4981 | 4935 | { |
---|
4982 | 4936 | DRM_ERROR("Illegal register access in command stream\n"); |
---|
4983 | | - schedule_work(&adev->reset_work); |
---|
| 4937 | + gfx_v7_0_fault(adev, entry); |
---|
4984 | 4938 | return 0; |
---|
4985 | 4939 | } |
---|
4986 | 4940 | |
---|
.. | .. |
---|
4990 | 4944 | { |
---|
4991 | 4945 | DRM_ERROR("Illegal instruction in command stream\n"); |
---|
4992 | 4946 | // XXX soft reset the gfx block only |
---|
4993 | | - schedule_work(&adev->reset_work); |
---|
| 4947 | + gfx_v7_0_fault(adev, entry); |
---|
4994 | 4948 | return 0; |
---|
4995 | 4949 | } |
---|
4996 | 4950 | |
---|
.. | .. |
---|
5042 | 4996 | return 0; |
---|
5043 | 4997 | } |
---|
5044 | 4998 | |
---|
| 4999 | +static void gfx_v7_0_emit_mem_sync(struct amdgpu_ring *ring) |
---|
| 5000 | +{ |
---|
| 5001 | + amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); |
---|
| 5002 | + amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | |
---|
| 5003 | + PACKET3_TC_ACTION_ENA | |
---|
| 5004 | + PACKET3_SH_KCACHE_ACTION_ENA | |
---|
| 5005 | + PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */ |
---|
| 5006 | + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ |
---|
| 5007 | + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ |
---|
| 5008 | + amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ |
---|
| 5009 | +} |
---|
| 5010 | + |
---|
| 5011 | +static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring) |
---|
| 5012 | +{ |
---|
| 5013 | + amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); |
---|
| 5014 | + amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | |
---|
| 5015 | + PACKET3_TC_ACTION_ENA | |
---|
| 5016 | + PACKET3_SH_KCACHE_ACTION_ENA | |
---|
| 5017 | + PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */ |
---|
| 5018 | + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ |
---|
| 5019 | + amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */ |
---|
| 5020 | + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ |
---|
| 5021 | + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ |
---|
| 5022 | + amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ |
---|
| 5023 | +} |
---|
| 5024 | + |
---|
5045 | 5025 | static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { |
---|
5046 | 5026 | .name = "gfx_v7_0", |
---|
5047 | 5027 | .early_init = gfx_v7_0_early_init, |
---|
.. | .. |
---|
5074 | 5054 | 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */ |
---|
5075 | 5055 | 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */ |
---|
5076 | 5056 | CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */ |
---|
5077 | | - 3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/ |
---|
| 5057 | + 3 + 4 + /* gfx_v7_ring_emit_cntxcntl including vgt flush*/ |
---|
| 5058 | + 5, /* SURFACE_SYNC */ |
---|
5078 | 5059 | .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */ |
---|
5079 | 5060 | .emit_ib = gfx_v7_0_ring_emit_ib_gfx, |
---|
5080 | 5061 | .emit_fence = gfx_v7_0_ring_emit_fence_gfx, |
---|
.. | .. |
---|
5088 | 5069 | .pad_ib = amdgpu_ring_generic_pad_ib, |
---|
5089 | 5070 | .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl, |
---|
5090 | 5071 | .emit_wreg = gfx_v7_0_ring_emit_wreg, |
---|
| 5072 | + .soft_recovery = gfx_v7_0_ring_soft_recovery, |
---|
| 5073 | + .emit_mem_sync = gfx_v7_0_emit_mem_sync, |
---|
5091 | 5074 | }; |
---|
5092 | 5075 | |
---|
5093 | 5076 | static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { |
---|
.. | .. |
---|
5104 | 5087 | 5 + /* hdp invalidate */ |
---|
5105 | 5088 | 7 + /* gfx_v7_0_ring_emit_pipeline_sync */ |
---|
5106 | 5089 | CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */ |
---|
5107 | | - 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ |
---|
5108 | | - .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */ |
---|
| 5090 | + 7 + 7 + 7 + /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ |
---|
| 5091 | + 7, /* gfx_v7_0_emit_mem_sync_compute */ |
---|
| 5092 | + .emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */ |
---|
5109 | 5093 | .emit_ib = gfx_v7_0_ring_emit_ib_compute, |
---|
5110 | 5094 | .emit_fence = gfx_v7_0_ring_emit_fence_compute, |
---|
5111 | 5095 | .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync, |
---|
.. | .. |
---|
5117 | 5101 | .insert_nop = amdgpu_ring_insert_nop, |
---|
5118 | 5102 | .pad_ib = amdgpu_ring_generic_pad_ib, |
---|
5119 | 5103 | .emit_wreg = gfx_v7_0_ring_emit_wreg, |
---|
| 5104 | + .emit_mem_sync = gfx_v7_0_emit_mem_sync_compute, |
---|
5120 | 5105 | }; |
---|
5121 | 5106 | |
---|
5122 | 5107 | static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) |
---|
.. | .. |
---|
5159 | 5144 | static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev) |
---|
5160 | 5145 | { |
---|
5161 | 5146 | /* init asci gds info */ |
---|
5162 | | - adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); |
---|
5163 | | - adev->gds.gws.total_size = 64; |
---|
5164 | | - adev->gds.oa.total_size = 16; |
---|
5165 | | - |
---|
5166 | | - if (adev->gds.mem.total_size == 64 * 1024) { |
---|
5167 | | - adev->gds.mem.gfx_partition_size = 4096; |
---|
5168 | | - adev->gds.mem.cs_partition_size = 4096; |
---|
5169 | | - |
---|
5170 | | - adev->gds.gws.gfx_partition_size = 4; |
---|
5171 | | - adev->gds.gws.cs_partition_size = 4; |
---|
5172 | | - |
---|
5173 | | - adev->gds.oa.gfx_partition_size = 4; |
---|
5174 | | - adev->gds.oa.cs_partition_size = 1; |
---|
5175 | | - } else { |
---|
5176 | | - adev->gds.mem.gfx_partition_size = 1024; |
---|
5177 | | - adev->gds.mem.cs_partition_size = 1024; |
---|
5178 | | - |
---|
5179 | | - adev->gds.gws.gfx_partition_size = 16; |
---|
5180 | | - adev->gds.gws.cs_partition_size = 16; |
---|
5181 | | - |
---|
5182 | | - adev->gds.oa.gfx_partition_size = 4; |
---|
5183 | | - adev->gds.oa.cs_partition_size = 4; |
---|
5184 | | - } |
---|
| 5147 | + adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE); |
---|
| 5148 | + adev->gds.gws_size = 64; |
---|
| 5149 | + adev->gds.oa_size = 16; |
---|
| 5150 | + adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); |
---|
5185 | 5151 | } |
---|
5186 | 5152 | |
---|
5187 | 5153 | |
---|
.. | .. |
---|
5241 | 5207 | cu_info->lds_size = 64; |
---|
5242 | 5208 | } |
---|
5243 | 5209 | |
---|
5244 | | -const struct amdgpu_ip_block_version gfx_v7_0_ip_block = |
---|
| 5210 | +static const struct amdgpu_ip_block_version gfx_v7_0_ip_block = |
---|
5245 | 5211 | { |
---|
5246 | 5212 | .type = AMD_IP_BLOCK_TYPE_GFX, |
---|
5247 | 5213 | .major = 7, |
---|