.. | .. |
---|
29 | 29 | * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> |
---|
30 | 30 | * Dave Airlie |
---|
31 | 31 | */ |
---|
| 32 | + |
---|
| 33 | +#include <linux/dma-mapping.h> |
---|
| 34 | +#include <linux/iommu.h> |
---|
| 35 | +#include <linux/hmm.h> |
---|
| 36 | +#include <linux/pagemap.h> |
---|
| 37 | +#include <linux/sched/task.h> |
---|
| 38 | +#include <linux/sched/mm.h> |
---|
| 39 | +#include <linux/seq_file.h> |
---|
| 40 | +#include <linux/slab.h> |
---|
| 41 | +#include <linux/swap.h> |
---|
| 42 | +#include <linux/swiotlb.h> |
---|
| 43 | +#include <linux/dma-buf.h> |
---|
| 44 | +#include <linux/sizes.h> |
---|
| 45 | + |
---|
32 | 46 | #include <drm/ttm/ttm_bo_api.h> |
---|
33 | 47 | #include <drm/ttm/ttm_bo_driver.h> |
---|
34 | 48 | #include <drm/ttm/ttm_placement.h> |
---|
35 | 49 | #include <drm/ttm/ttm_module.h> |
---|
36 | 50 | #include <drm/ttm/ttm_page_alloc.h> |
---|
37 | | -#include <drm/drmP.h> |
---|
| 51 | + |
---|
| 52 | +#include <drm/drm_debugfs.h> |
---|
38 | 53 | #include <drm/amdgpu_drm.h> |
---|
39 | | -#include <linux/seq_file.h> |
---|
40 | | -#include <linux/slab.h> |
---|
41 | | -#include <linux/swiotlb.h> |
---|
42 | | -#include <linux/swap.h> |
---|
43 | | -#include <linux/pagemap.h> |
---|
44 | | -#include <linux/debugfs.h> |
---|
45 | | -#include <linux/iommu.h> |
---|
| 54 | + |
---|
46 | 55 | #include "amdgpu.h" |
---|
47 | 56 | #include "amdgpu_object.h" |
---|
48 | 57 | #include "amdgpu_trace.h" |
---|
49 | 58 | #include "amdgpu_amdkfd.h" |
---|
| 59 | +#include "amdgpu_sdma.h" |
---|
| 60 | +#include "amdgpu_ras.h" |
---|
| 61 | +#include "amdgpu_atomfirmware.h" |
---|
50 | 62 | #include "bif/bif_4_1_d.h" |
---|
51 | 63 | |
---|
52 | | -#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) |
---|
| 64 | +#define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128 |
---|
53 | 65 | |
---|
54 | | -static int amdgpu_map_buffer(struct ttm_buffer_object *bo, |
---|
55 | | - struct ttm_mem_reg *mem, unsigned num_pages, |
---|
56 | | - uint64_t offset, unsigned window, |
---|
57 | | - struct amdgpu_ring *ring, |
---|
58 | | - uint64_t *addr); |
---|
| 66 | +static int amdgpu_ttm_backend_bind(struct ttm_bo_device *bdev, |
---|
| 67 | + struct ttm_tt *ttm, |
---|
| 68 | + struct ttm_resource *bo_mem); |
---|
59 | 69 | |
---|
60 | | -static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev); |
---|
61 | | -static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev); |
---|
62 | | - |
---|
63 | | -/* |
---|
64 | | - * Global memory. |
---|
65 | | - */ |
---|
66 | | - |
---|
67 | | -/** |
---|
68 | | - * amdgpu_ttm_mem_global_init - Initialize and acquire reference to |
---|
69 | | - * memory object |
---|
70 | | - * |
---|
71 | | - * @ref: Object for initialization. |
---|
72 | | - * |
---|
73 | | - * This is called by drm_global_item_ref() when an object is being |
---|
74 | | - * initialized. |
---|
75 | | - */ |
---|
76 | | -static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref) |
---|
| 70 | +static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev, |
---|
| 71 | + unsigned int type, |
---|
| 72 | + uint64_t size_in_page) |
---|
77 | 73 | { |
---|
78 | | - return ttm_mem_global_init(ref->object); |
---|
79 | | -} |
---|
80 | | - |
---|
81 | | -/** |
---|
82 | | - * amdgpu_ttm_mem_global_release - Drop reference to a memory object |
---|
83 | | - * |
---|
84 | | - * @ref: Object being removed |
---|
85 | | - * |
---|
86 | | - * This is called by drm_global_item_unref() when an object is being |
---|
87 | | - * released. |
---|
88 | | - */ |
---|
89 | | -static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref) |
---|
90 | | -{ |
---|
91 | | - ttm_mem_global_release(ref->object); |
---|
92 | | -} |
---|
93 | | - |
---|
94 | | -/** |
---|
95 | | - * amdgpu_ttm_global_init - Initialize global TTM memory reference structures. |
---|
96 | | - * |
---|
97 | | - * @adev: AMDGPU device for which the global structures need to be registered. |
---|
98 | | - * |
---|
99 | | - * This is called as part of the AMDGPU ttm init from amdgpu_ttm_init() |
---|
100 | | - * during bring up. |
---|
101 | | - */ |
---|
102 | | -static int amdgpu_ttm_global_init(struct amdgpu_device *adev) |
---|
103 | | -{ |
---|
104 | | - struct drm_global_reference *global_ref; |
---|
105 | | - int r; |
---|
106 | | - |
---|
107 | | - /* ensure reference is false in case init fails */ |
---|
108 | | - adev->mman.mem_global_referenced = false; |
---|
109 | | - |
---|
110 | | - global_ref = &adev->mman.mem_global_ref; |
---|
111 | | - global_ref->global_type = DRM_GLOBAL_TTM_MEM; |
---|
112 | | - global_ref->size = sizeof(struct ttm_mem_global); |
---|
113 | | - global_ref->init = &amdgpu_ttm_mem_global_init; |
---|
114 | | - global_ref->release = &amdgpu_ttm_mem_global_release; |
---|
115 | | - r = drm_global_item_ref(global_ref); |
---|
116 | | - if (r) { |
---|
117 | | - DRM_ERROR("Failed setting up TTM memory accounting " |
---|
118 | | - "subsystem.\n"); |
---|
119 | | - goto error_mem; |
---|
120 | | - } |
---|
121 | | - |
---|
122 | | - adev->mman.bo_global_ref.mem_glob = |
---|
123 | | - adev->mman.mem_global_ref.object; |
---|
124 | | - global_ref = &adev->mman.bo_global_ref.ref; |
---|
125 | | - global_ref->global_type = DRM_GLOBAL_TTM_BO; |
---|
126 | | - global_ref->size = sizeof(struct ttm_bo_global); |
---|
127 | | - global_ref->init = &ttm_bo_global_init; |
---|
128 | | - global_ref->release = &ttm_bo_global_release; |
---|
129 | | - r = drm_global_item_ref(global_ref); |
---|
130 | | - if (r) { |
---|
131 | | - DRM_ERROR("Failed setting up TTM BO subsystem.\n"); |
---|
132 | | - goto error_bo; |
---|
133 | | - } |
---|
134 | | - |
---|
135 | | - mutex_init(&adev->mman.gtt_window_lock); |
---|
136 | | - |
---|
137 | | - adev->mman.mem_global_referenced = true; |
---|
138 | | - |
---|
139 | | - return 0; |
---|
140 | | - |
---|
141 | | -error_bo: |
---|
142 | | - drm_global_item_unref(&adev->mman.mem_global_ref); |
---|
143 | | -error_mem: |
---|
144 | | - return r; |
---|
145 | | -} |
---|
146 | | - |
---|
147 | | -static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) |
---|
148 | | -{ |
---|
149 | | - if (adev->mman.mem_global_referenced) { |
---|
150 | | - mutex_destroy(&adev->mman.gtt_window_lock); |
---|
151 | | - drm_global_item_unref(&adev->mman.bo_global_ref.ref); |
---|
152 | | - drm_global_item_unref(&adev->mman.mem_global_ref); |
---|
153 | | - adev->mman.mem_global_referenced = false; |
---|
154 | | - } |
---|
155 | | -} |
---|
156 | | - |
---|
157 | | -static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags) |
---|
158 | | -{ |
---|
159 | | - return 0; |
---|
160 | | -} |
---|
161 | | - |
---|
162 | | -/** |
---|
163 | | - * amdgpu_init_mem_type - Initialize a memory manager for a specific type of |
---|
164 | | - * memory request. |
---|
165 | | - * |
---|
166 | | - * @bdev: The TTM BO device object (contains a reference to amdgpu_device) |
---|
167 | | - * @type: The type of memory requested |
---|
168 | | - * @man: The memory type manager for each domain |
---|
169 | | - * |
---|
170 | | - * This is called by ttm_bo_init_mm() when a buffer object is being |
---|
171 | | - * initialized. |
---|
172 | | - */ |
---|
173 | | -static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, |
---|
174 | | - struct ttm_mem_type_manager *man) |
---|
175 | | -{ |
---|
176 | | - struct amdgpu_device *adev; |
---|
177 | | - |
---|
178 | | - adev = amdgpu_ttm_adev(bdev); |
---|
179 | | - |
---|
180 | | - switch (type) { |
---|
181 | | - case TTM_PL_SYSTEM: |
---|
182 | | - /* System memory */ |
---|
183 | | - man->flags = TTM_MEMTYPE_FLAG_MAPPABLE; |
---|
184 | | - man->available_caching = TTM_PL_MASK_CACHING; |
---|
185 | | - man->default_caching = TTM_PL_FLAG_CACHED; |
---|
186 | | - break; |
---|
187 | | - case TTM_PL_TT: |
---|
188 | | - /* GTT memory */ |
---|
189 | | - man->func = &amdgpu_gtt_mgr_func; |
---|
190 | | - man->gpu_offset = adev->gmc.gart_start; |
---|
191 | | - man->available_caching = TTM_PL_MASK_CACHING; |
---|
192 | | - man->default_caching = TTM_PL_FLAG_CACHED; |
---|
193 | | - man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA; |
---|
194 | | - break; |
---|
195 | | - case TTM_PL_VRAM: |
---|
196 | | - /* "On-card" video ram */ |
---|
197 | | - man->func = &amdgpu_vram_mgr_func; |
---|
198 | | - man->gpu_offset = adev->gmc.vram_start; |
---|
199 | | - man->flags = TTM_MEMTYPE_FLAG_FIXED | |
---|
200 | | - TTM_MEMTYPE_FLAG_MAPPABLE; |
---|
201 | | - man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC; |
---|
202 | | - man->default_caching = TTM_PL_FLAG_WC; |
---|
203 | | - break; |
---|
204 | | - case AMDGPU_PL_GDS: |
---|
205 | | - case AMDGPU_PL_GWS: |
---|
206 | | - case AMDGPU_PL_OA: |
---|
207 | | - /* On-chip GDS memory*/ |
---|
208 | | - man->func = &ttm_bo_manager_func; |
---|
209 | | - man->gpu_offset = 0; |
---|
210 | | - man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_CMA; |
---|
211 | | - man->available_caching = TTM_PL_FLAG_UNCACHED; |
---|
212 | | - man->default_caching = TTM_PL_FLAG_UNCACHED; |
---|
213 | | - break; |
---|
214 | | - default: |
---|
215 | | - DRM_ERROR("Unsupported memory type %u\n", (unsigned)type); |
---|
216 | | - return -EINVAL; |
---|
217 | | - } |
---|
218 | | - return 0; |
---|
| 74 | + return ttm_range_man_init(&adev->mman.bdev, type, |
---|
| 75 | + false, size_in_page); |
---|
219 | 76 | } |
---|
220 | 77 | |
---|
221 | 78 | /** |
---|
.. | .. |
---|
234 | 91 | static const struct ttm_place placements = { |
---|
235 | 92 | .fpfn = 0, |
---|
236 | 93 | .lpfn = 0, |
---|
237 | | - .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM |
---|
| 94 | + .mem_type = TTM_PL_SYSTEM, |
---|
| 95 | + .flags = TTM_PL_MASK_CACHING |
---|
238 | 96 | }; |
---|
239 | 97 | |
---|
240 | 98 | /* Don't handle scatter gather BOs */ |
---|
.. | .. |
---|
255 | 113 | |
---|
256 | 114 | abo = ttm_to_amdgpu_bo(bo); |
---|
257 | 115 | switch (bo->mem.mem_type) { |
---|
| 116 | + case AMDGPU_PL_GDS: |
---|
| 117 | + case AMDGPU_PL_GWS: |
---|
| 118 | + case AMDGPU_PL_OA: |
---|
| 119 | + placement->num_placement = 0; |
---|
| 120 | + placement->num_busy_placement = 0; |
---|
| 121 | + return; |
---|
| 122 | + |
---|
258 | 123 | case TTM_PL_VRAM: |
---|
259 | 124 | if (!adev->mman.buffer_funcs_enabled) { |
---|
260 | 125 | /* Move to system memory */ |
---|
.. | .. |
---|
282 | 147 | case TTM_PL_TT: |
---|
283 | 148 | default: |
---|
284 | 149 | amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); |
---|
| 150 | + break; |
---|
285 | 151 | } |
---|
286 | 152 | *placement = abo->placement; |
---|
287 | 153 | } |
---|
.. | .. |
---|
308 | 174 | |
---|
309 | 175 | if (amdgpu_ttm_tt_get_usermm(bo->ttm)) |
---|
310 | 176 | return -EPERM; |
---|
311 | | - return drm_vma_node_verify_access(&abo->gem_base.vma_node, |
---|
| 177 | + return drm_vma_node_verify_access(&abo->tbo.base.vma_node, |
---|
312 | 178 | filp->private_data); |
---|
313 | | -} |
---|
314 | | - |
---|
315 | | -/** |
---|
316 | | - * amdgpu_move_null - Register memory for a buffer object |
---|
317 | | - * |
---|
318 | | - * @bo: The bo to assign the memory to |
---|
319 | | - * @new_mem: The memory to be assigned. |
---|
320 | | - * |
---|
321 | | - * Assign the memory from new_mem to the memory of the buffer object bo. |
---|
322 | | - */ |
---|
323 | | -static void amdgpu_move_null(struct ttm_buffer_object *bo, |
---|
324 | | - struct ttm_mem_reg *new_mem) |
---|
325 | | -{ |
---|
326 | | - struct ttm_mem_reg *old_mem = &bo->mem; |
---|
327 | | - |
---|
328 | | - BUG_ON(old_mem->mm_node != NULL); |
---|
329 | | - *old_mem = *new_mem; |
---|
330 | | - new_mem->mm_node = NULL; |
---|
331 | 179 | } |
---|
332 | 180 | |
---|
333 | 181 | /** |
---|
.. | .. |
---|
340 | 188 | */ |
---|
341 | 189 | static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, |
---|
342 | 190 | struct drm_mm_node *mm_node, |
---|
343 | | - struct ttm_mem_reg *mem) |
---|
| 191 | + struct ttm_resource *mem) |
---|
344 | 192 | { |
---|
345 | 193 | uint64_t addr = 0; |
---|
346 | 194 | |
---|
347 | | - if (mem->mem_type != TTM_PL_TT || amdgpu_gtt_mgr_has_gart_addr(mem)) { |
---|
| 195 | + if (mm_node->start != AMDGPU_BO_INVALID_OFFSET) { |
---|
348 | 196 | addr = mm_node->start << PAGE_SHIFT; |
---|
349 | | - addr += bo->bdev->man[mem->mem_type].gpu_offset; |
---|
| 197 | + addr += amdgpu_ttm_domain_start(amdgpu_ttm_adev(bo->bdev), |
---|
| 198 | + mem->mem_type); |
---|
350 | 199 | } |
---|
351 | 200 | return addr; |
---|
352 | 201 | } |
---|
.. | .. |
---|
359 | 208 | * @offset: The offset that drm_mm_node is used for finding. |
---|
360 | 209 | * |
---|
361 | 210 | */ |
---|
362 | | -static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, |
---|
363 | | - unsigned long *offset) |
---|
| 211 | +static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_resource *mem, |
---|
| 212 | + uint64_t *offset) |
---|
364 | 213 | { |
---|
365 | 214 | struct drm_mm_node *mm_node = mem->mm_node; |
---|
366 | 215 | |
---|
.. | .. |
---|
372 | 221 | } |
---|
373 | 222 | |
---|
374 | 223 | /** |
---|
| 224 | + * amdgpu_ttm_map_buffer - Map memory into the GART windows |
---|
| 225 | + * @bo: buffer object to map |
---|
| 226 | + * @mem: memory object to map |
---|
| 227 | + * @mm_node: drm_mm node object to map |
---|
| 228 | + * @num_pages: number of pages to map |
---|
| 229 | + * @offset: offset into @mm_node where to start |
---|
| 230 | + * @window: which GART window to use |
---|
| 231 | + * @ring: DMA ring to use for the copy |
---|
| 232 | + * @tmz: if we should setup a TMZ enabled mapping |
---|
| 233 | + * @addr: resulting address inside the MC address space |
---|
| 234 | + * |
---|
| 235 | + * Setup one of the GART windows to access a specific piece of memory or return |
---|
| 236 | + * the physical address for local memory. |
---|
| 237 | + */ |
---|
| 238 | +static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, |
---|
| 239 | + struct ttm_resource *mem, |
---|
| 240 | + struct drm_mm_node *mm_node, |
---|
| 241 | + unsigned num_pages, uint64_t offset, |
---|
| 242 | + unsigned window, struct amdgpu_ring *ring, |
---|
| 243 | + bool tmz, uint64_t *addr) |
---|
| 244 | +{ |
---|
| 245 | + struct amdgpu_device *adev = ring->adev; |
---|
| 246 | + struct amdgpu_job *job; |
---|
| 247 | + unsigned num_dw, num_bytes; |
---|
| 248 | + struct dma_fence *fence; |
---|
| 249 | + uint64_t src_addr, dst_addr; |
---|
| 250 | + void *cpu_addr; |
---|
| 251 | + uint64_t flags; |
---|
| 252 | + unsigned int i; |
---|
| 253 | + int r; |
---|
| 254 | + |
---|
| 255 | + BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < |
---|
| 256 | + AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); |
---|
| 257 | + |
---|
| 258 | + /* Map only what can't be accessed directly */ |
---|
| 259 | + if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) { |
---|
| 260 | + *addr = amdgpu_mm_node_addr(bo, mm_node, mem) + offset; |
---|
| 261 | + return 0; |
---|
| 262 | + } |
---|
| 263 | + |
---|
| 264 | + *addr = adev->gmc.gart_start; |
---|
| 265 | + *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * |
---|
| 266 | + AMDGPU_GPU_PAGE_SIZE; |
---|
| 267 | + *addr += offset & ~PAGE_MASK; |
---|
| 268 | + |
---|
| 269 | + num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); |
---|
| 270 | + num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE; |
---|
| 271 | + |
---|
| 272 | + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, |
---|
| 273 | + AMDGPU_IB_POOL_DELAYED, &job); |
---|
| 274 | + if (r) |
---|
| 275 | + return r; |
---|
| 276 | + |
---|
| 277 | + src_addr = num_dw * 4; |
---|
| 278 | + src_addr += job->ibs[0].gpu_addr; |
---|
| 279 | + |
---|
| 280 | + dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); |
---|
| 281 | + dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; |
---|
| 282 | + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, |
---|
| 283 | + dst_addr, num_bytes, false); |
---|
| 284 | + |
---|
| 285 | + amdgpu_ring_pad_ib(ring, &job->ibs[0]); |
---|
| 286 | + WARN_ON(job->ibs[0].length_dw > num_dw); |
---|
| 287 | + |
---|
| 288 | + flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem); |
---|
| 289 | + if (tmz) |
---|
| 290 | + flags |= AMDGPU_PTE_TMZ; |
---|
| 291 | + |
---|
| 292 | + cpu_addr = &job->ibs[0].ptr[num_dw]; |
---|
| 293 | + |
---|
| 294 | + if (mem->mem_type == TTM_PL_TT) { |
---|
| 295 | + struct ttm_dma_tt *dma; |
---|
| 296 | + dma_addr_t *dma_address; |
---|
| 297 | + |
---|
| 298 | + dma = container_of(bo->ttm, struct ttm_dma_tt, ttm); |
---|
| 299 | + dma_address = &dma->dma_address[offset >> PAGE_SHIFT]; |
---|
| 300 | + r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, |
---|
| 301 | + cpu_addr); |
---|
| 302 | + if (r) |
---|
| 303 | + goto error_free; |
---|
| 304 | + } else { |
---|
| 305 | + dma_addr_t dma_address; |
---|
| 306 | + |
---|
| 307 | + dma_address = (mm_node->start << PAGE_SHIFT) + offset; |
---|
| 308 | + dma_address += adev->vm_manager.vram_base_offset; |
---|
| 309 | + |
---|
| 310 | + for (i = 0; i < num_pages; ++i) { |
---|
| 311 | + r = amdgpu_gart_map(adev, i << PAGE_SHIFT, 1, |
---|
| 312 | + &dma_address, flags, cpu_addr); |
---|
| 313 | + if (r) |
---|
| 314 | + goto error_free; |
---|
| 315 | + |
---|
| 316 | + dma_address += PAGE_SIZE; |
---|
| 317 | + } |
---|
| 318 | + } |
---|
| 319 | + |
---|
| 320 | + r = amdgpu_job_submit(job, &adev->mman.entity, |
---|
| 321 | + AMDGPU_FENCE_OWNER_UNDEFINED, &fence); |
---|
| 322 | + if (r) |
---|
| 323 | + goto error_free; |
---|
| 324 | + |
---|
| 325 | + dma_fence_put(fence); |
---|
| 326 | + |
---|
| 327 | + return r; |
---|
| 328 | + |
---|
| 329 | +error_free: |
---|
| 330 | + amdgpu_job_free(job); |
---|
| 331 | + return r; |
---|
| 332 | +} |
---|
| 333 | + |
---|
| 334 | +/** |
---|
375 | 335 | * amdgpu_copy_ttm_mem_to_mem - Helper function for copy |
---|
| 336 | + * @adev: amdgpu device |
---|
| 337 | + * @src: buffer/address where to read from |
---|
| 338 | + * @dst: buffer/address where to write to |
---|
| 339 | + * @size: number of bytes to copy |
---|
| 340 | + * @tmz: if a secure copy should be used |
---|
| 341 | + * @resv: resv object to sync to |
---|
| 342 | + * @f: Returns the last fence if multiple jobs are submitted. |
---|
376 | 343 | * |
---|
377 | 344 | * The function copies @size bytes from {src->mem + src->offset} to |
---|
378 | 345 | * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a |
---|
379 | 346 | * move and different for a BO to BO copy. |
---|
380 | 347 | * |
---|
381 | | - * @f: Returns the last fence if multiple jobs are submitted. |
---|
382 | 348 | */ |
---|
383 | 349 | int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, |
---|
384 | | - struct amdgpu_copy_mem *src, |
---|
385 | | - struct amdgpu_copy_mem *dst, |
---|
386 | | - uint64_t size, |
---|
387 | | - struct reservation_object *resv, |
---|
| 350 | + const struct amdgpu_copy_mem *src, |
---|
| 351 | + const struct amdgpu_copy_mem *dst, |
---|
| 352 | + uint64_t size, bool tmz, |
---|
| 353 | + struct dma_resv *resv, |
---|
388 | 354 | struct dma_fence **f) |
---|
389 | 355 | { |
---|
| 356 | + const uint32_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * |
---|
| 357 | + AMDGPU_GPU_PAGE_SIZE); |
---|
| 358 | + |
---|
| 359 | + uint64_t src_node_size, dst_node_size, src_offset, dst_offset; |
---|
390 | 360 | struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; |
---|
391 | 361 | struct drm_mm_node *src_mm, *dst_mm; |
---|
392 | | - uint64_t src_node_start, dst_node_start, src_node_size, |
---|
393 | | - dst_node_size, src_page_offset, dst_page_offset; |
---|
394 | 362 | struct dma_fence *fence = NULL; |
---|
395 | 363 | int r = 0; |
---|
396 | | - const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * |
---|
397 | | - AMDGPU_GPU_PAGE_SIZE); |
---|
398 | 364 | |
---|
399 | 365 | if (!adev->mman.buffer_funcs_enabled) { |
---|
400 | 366 | DRM_ERROR("Trying to move memory with ring turned off.\n"); |
---|
401 | 367 | return -EINVAL; |
---|
402 | 368 | } |
---|
403 | 369 | |
---|
404 | | - src_mm = amdgpu_find_mm_node(src->mem, &src->offset); |
---|
405 | | - src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + |
---|
406 | | - src->offset; |
---|
407 | | - src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; |
---|
408 | | - src_page_offset = src_node_start & (PAGE_SIZE - 1); |
---|
| 370 | + src_offset = src->offset; |
---|
| 371 | + if (src->mem->mm_node) { |
---|
| 372 | + src_mm = amdgpu_find_mm_node(src->mem, &src_offset); |
---|
| 373 | + src_node_size = (src_mm->size << PAGE_SHIFT) - src_offset; |
---|
| 374 | + } else { |
---|
| 375 | + src_mm = NULL; |
---|
| 376 | + src_node_size = ULLONG_MAX; |
---|
| 377 | + } |
---|
409 | 378 | |
---|
410 | | - dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset); |
---|
411 | | - dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + |
---|
412 | | - dst->offset; |
---|
413 | | - dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; |
---|
414 | | - dst_page_offset = dst_node_start & (PAGE_SIZE - 1); |
---|
| 379 | + dst_offset = dst->offset; |
---|
| 380 | + if (dst->mem->mm_node) { |
---|
| 381 | + dst_mm = amdgpu_find_mm_node(dst->mem, &dst_offset); |
---|
| 382 | + dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst_offset; |
---|
| 383 | + } else { |
---|
| 384 | + dst_mm = NULL; |
---|
| 385 | + dst_node_size = ULLONG_MAX; |
---|
| 386 | + } |
---|
415 | 387 | |
---|
416 | 388 | mutex_lock(&adev->mman.gtt_window_lock); |
---|
417 | 389 | |
---|
418 | 390 | while (size) { |
---|
419 | | - unsigned long cur_size; |
---|
420 | | - uint64_t from = src_node_start, to = dst_node_start; |
---|
| 391 | + uint32_t src_page_offset = src_offset & ~PAGE_MASK; |
---|
| 392 | + uint32_t dst_page_offset = dst_offset & ~PAGE_MASK; |
---|
421 | 393 | struct dma_fence *next; |
---|
| 394 | + uint32_t cur_size; |
---|
| 395 | + uint64_t from, to; |
---|
422 | 396 | |
---|
423 | 397 | /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst |
---|
424 | 398 | * begins at an offset, then adjust the size accordingly |
---|
425 | 399 | */ |
---|
426 | | - cur_size = min3(min(src_node_size, dst_node_size), size, |
---|
427 | | - GTT_MAX_BYTES); |
---|
428 | | - if (cur_size + src_page_offset > GTT_MAX_BYTES || |
---|
429 | | - cur_size + dst_page_offset > GTT_MAX_BYTES) |
---|
430 | | - cur_size -= max(src_page_offset, dst_page_offset); |
---|
| 400 | + cur_size = max(src_page_offset, dst_page_offset); |
---|
| 401 | + cur_size = min(min3(src_node_size, dst_node_size, size), |
---|
| 402 | + (uint64_t)(GTT_MAX_BYTES - cur_size)); |
---|
431 | 403 | |
---|
432 | | - /* Map only what needs to be accessed. Map src to window 0 and |
---|
433 | | - * dst to window 1 |
---|
434 | | - */ |
---|
435 | | - if (src->mem->mem_type == TTM_PL_TT && |
---|
436 | | - !amdgpu_gtt_mgr_has_gart_addr(src->mem)) { |
---|
437 | | - r = amdgpu_map_buffer(src->bo, src->mem, |
---|
438 | | - PFN_UP(cur_size + src_page_offset), |
---|
439 | | - src_node_start, 0, ring, |
---|
440 | | - &from); |
---|
441 | | - if (r) |
---|
442 | | - goto error; |
---|
443 | | - /* Adjust the offset because amdgpu_map_buffer returns |
---|
444 | | - * start of mapped page |
---|
445 | | - */ |
---|
446 | | - from += src_page_offset; |
---|
447 | | - } |
---|
| 404 | + /* Map src to window 0 and dst to window 1. */ |
---|
| 405 | + r = amdgpu_ttm_map_buffer(src->bo, src->mem, src_mm, |
---|
| 406 | + PFN_UP(cur_size + src_page_offset), |
---|
| 407 | + src_offset, 0, ring, tmz, &from); |
---|
| 408 | + if (r) |
---|
| 409 | + goto error; |
---|
448 | 410 | |
---|
449 | | - if (dst->mem->mem_type == TTM_PL_TT && |
---|
450 | | - !amdgpu_gtt_mgr_has_gart_addr(dst->mem)) { |
---|
451 | | - r = amdgpu_map_buffer(dst->bo, dst->mem, |
---|
452 | | - PFN_UP(cur_size + dst_page_offset), |
---|
453 | | - dst_node_start, 1, ring, |
---|
454 | | - &to); |
---|
455 | | - if (r) |
---|
456 | | - goto error; |
---|
457 | | - to += dst_page_offset; |
---|
458 | | - } |
---|
| 411 | + r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, dst_mm, |
---|
| 412 | + PFN_UP(cur_size + dst_page_offset), |
---|
| 413 | + dst_offset, 1, ring, tmz, &to); |
---|
| 414 | + if (r) |
---|
| 415 | + goto error; |
---|
459 | 416 | |
---|
460 | 417 | r = amdgpu_copy_buffer(ring, from, to, cur_size, |
---|
461 | | - resv, &next, false, true); |
---|
| 418 | + resv, &next, false, true, tmz); |
---|
462 | 419 | if (r) |
---|
463 | 420 | goto error; |
---|
464 | 421 | |
---|
.. | .. |
---|
471 | 428 | |
---|
472 | 429 | src_node_size -= cur_size; |
---|
473 | 430 | if (!src_node_size) { |
---|
474 | | - src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, |
---|
475 | | - src->mem); |
---|
476 | | - src_node_size = (src_mm->size << PAGE_SHIFT); |
---|
| 431 | + ++src_mm; |
---|
| 432 | + src_node_size = src_mm->size << PAGE_SHIFT; |
---|
| 433 | + src_offset = 0; |
---|
477 | 434 | } else { |
---|
478 | | - src_node_start += cur_size; |
---|
479 | | - src_page_offset = src_node_start & (PAGE_SIZE - 1); |
---|
| 435 | + src_offset += cur_size; |
---|
480 | 436 | } |
---|
| 437 | + |
---|
481 | 438 | dst_node_size -= cur_size; |
---|
482 | 439 | if (!dst_node_size) { |
---|
483 | | - dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, |
---|
484 | | - dst->mem); |
---|
485 | | - dst_node_size = (dst_mm->size << PAGE_SHIFT); |
---|
| 440 | + ++dst_mm; |
---|
| 441 | + dst_node_size = dst_mm->size << PAGE_SHIFT; |
---|
| 442 | + dst_offset = 0; |
---|
486 | 443 | } else { |
---|
487 | | - dst_node_start += cur_size; |
---|
488 | | - dst_page_offset = dst_node_start & (PAGE_SIZE - 1); |
---|
| 444 | + dst_offset += cur_size; |
---|
489 | 445 | } |
---|
490 | 446 | } |
---|
491 | 447 | error: |
---|
.. | .. |
---|
503 | 459 | * help move buffers to and from VRAM. |
---|
504 | 460 | */ |
---|
505 | 461 | static int amdgpu_move_blit(struct ttm_buffer_object *bo, |
---|
506 | | - bool evict, bool no_wait_gpu, |
---|
507 | | - struct ttm_mem_reg *new_mem, |
---|
508 | | - struct ttm_mem_reg *old_mem) |
---|
| 462 | + bool evict, |
---|
| 463 | + struct ttm_resource *new_mem, |
---|
| 464 | + struct ttm_resource *old_mem) |
---|
509 | 465 | { |
---|
510 | 466 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); |
---|
| 467 | + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); |
---|
511 | 468 | struct amdgpu_copy_mem src, dst; |
---|
512 | 469 | struct dma_fence *fence = NULL; |
---|
513 | 470 | int r; |
---|
.. | .. |
---|
521 | 478 | |
---|
522 | 479 | r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, |
---|
523 | 480 | new_mem->num_pages << PAGE_SHIFT, |
---|
524 | | - bo->resv, &fence); |
---|
| 481 | + amdgpu_bo_encrypted(abo), |
---|
| 482 | + bo->base.resv, &fence); |
---|
525 | 483 | if (r) |
---|
526 | 484 | goto error; |
---|
527 | 485 | |
---|
528 | | - r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); |
---|
| 486 | + /* clear the space being freed */ |
---|
| 487 | + if (old_mem->mem_type == TTM_PL_VRAM && |
---|
| 488 | + (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { |
---|
| 489 | + struct dma_fence *wipe_fence = NULL; |
---|
| 490 | + |
---|
| 491 | + r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON, |
---|
| 492 | + NULL, &wipe_fence); |
---|
| 493 | + if (r) { |
---|
| 494 | + goto error; |
---|
| 495 | + } else if (wipe_fence) { |
---|
| 496 | + dma_fence_put(fence); |
---|
| 497 | + fence = wipe_fence; |
---|
| 498 | + } |
---|
| 499 | + } |
---|
| 500 | + |
---|
| 501 | + /* Always block for VM page tables before committing the new location */ |
---|
| 502 | + if (bo->type == ttm_bo_type_kernel) |
---|
| 503 | + r = ttm_bo_move_accel_cleanup(bo, fence, true, false, new_mem); |
---|
| 504 | + else |
---|
| 505 | + r = ttm_bo_move_accel_cleanup(bo, fence, evict, true, new_mem); |
---|
529 | 506 | dma_fence_put(fence); |
---|
530 | 507 | return r; |
---|
531 | 508 | |
---|
.. | .. |
---|
543 | 520 | */ |
---|
544 | 521 | static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, |
---|
545 | 522 | struct ttm_operation_ctx *ctx, |
---|
546 | | - struct ttm_mem_reg *new_mem) |
---|
| 523 | + struct ttm_resource *new_mem) |
---|
547 | 524 | { |
---|
548 | | - struct amdgpu_device *adev; |
---|
549 | | - struct ttm_mem_reg *old_mem = &bo->mem; |
---|
550 | | - struct ttm_mem_reg tmp_mem; |
---|
| 525 | + struct ttm_resource *old_mem = &bo->mem; |
---|
| 526 | + struct ttm_resource tmp_mem; |
---|
551 | 527 | struct ttm_place placements; |
---|
552 | 528 | struct ttm_placement placement; |
---|
553 | 529 | int r; |
---|
554 | | - |
---|
555 | | - adev = amdgpu_ttm_adev(bo->bdev); |
---|
556 | 530 | |
---|
557 | 531 | /* create space/pages for new_mem in GTT space */ |
---|
558 | 532 | tmp_mem = *new_mem; |
---|
.. | .. |
---|
563 | 537 | placement.busy_placement = &placements; |
---|
564 | 538 | placements.fpfn = 0; |
---|
565 | 539 | placements.lpfn = 0; |
---|
566 | | - placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; |
---|
| 540 | + placements.mem_type = TTM_PL_TT; |
---|
| 541 | + placements.flags = TTM_PL_MASK_CACHING; |
---|
567 | 542 | r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); |
---|
568 | 543 | if (unlikely(r)) { |
---|
| 544 | + pr_err("Failed to find GTT space for blit from VRAM\n"); |
---|
569 | 545 | return r; |
---|
570 | 546 | } |
---|
571 | 547 | |
---|
.. | .. |
---|
575 | 551 | goto out_cleanup; |
---|
576 | 552 | } |
---|
577 | 553 | |
---|
| 554 | + r = ttm_tt_populate(bo->bdev, bo->ttm, ctx); |
---|
| 555 | + if (unlikely(r)) |
---|
| 556 | + goto out_cleanup; |
---|
| 557 | + |
---|
578 | 558 | /* Bind the memory to the GTT space */ |
---|
579 | | - r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx); |
---|
| 559 | + r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, &tmp_mem); |
---|
580 | 560 | if (unlikely(r)) { |
---|
581 | 561 | goto out_cleanup; |
---|
582 | 562 | } |
---|
583 | 563 | |
---|
584 | 564 | /* blit VRAM to GTT */ |
---|
585 | | - r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, &tmp_mem, old_mem); |
---|
| 565 | + r = amdgpu_move_blit(bo, evict, &tmp_mem, old_mem); |
---|
586 | 566 | if (unlikely(r)) { |
---|
587 | 567 | goto out_cleanup; |
---|
588 | 568 | } |
---|
.. | .. |
---|
590 | 570 | /* move BO (in tmp_mem) to new_mem */ |
---|
591 | 571 | r = ttm_bo_move_ttm(bo, ctx, new_mem); |
---|
592 | 572 | out_cleanup: |
---|
593 | | - ttm_bo_mem_put(bo, &tmp_mem); |
---|
| 573 | + ttm_resource_free(bo, &tmp_mem); |
---|
594 | 574 | return r; |
---|
595 | 575 | } |
---|
596 | 576 | |
---|
.. | .. |
---|
601 | 581 | */ |
---|
602 | 582 | static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, |
---|
603 | 583 | struct ttm_operation_ctx *ctx, |
---|
604 | | - struct ttm_mem_reg *new_mem) |
---|
| 584 | + struct ttm_resource *new_mem) |
---|
605 | 585 | { |
---|
606 | | - struct amdgpu_device *adev; |
---|
607 | | - struct ttm_mem_reg *old_mem = &bo->mem; |
---|
608 | | - struct ttm_mem_reg tmp_mem; |
---|
| 586 | + struct ttm_resource *old_mem = &bo->mem; |
---|
| 587 | + struct ttm_resource tmp_mem; |
---|
609 | 588 | struct ttm_placement placement; |
---|
610 | 589 | struct ttm_place placements; |
---|
611 | 590 | int r; |
---|
612 | | - |
---|
613 | | - adev = amdgpu_ttm_adev(bo->bdev); |
---|
614 | 591 | |
---|
615 | 592 | /* make space in GTT for old_mem buffer */ |
---|
616 | 593 | tmp_mem = *new_mem; |
---|
.. | .. |
---|
621 | 598 | placement.busy_placement = &placements; |
---|
622 | 599 | placements.fpfn = 0; |
---|
623 | 600 | placements.lpfn = 0; |
---|
624 | | - placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; |
---|
| 601 | + placements.mem_type = TTM_PL_TT; |
---|
| 602 | + placements.flags = TTM_PL_MASK_CACHING; |
---|
625 | 603 | r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); |
---|
626 | 604 | if (unlikely(r)) { |
---|
| 605 | + pr_err("Failed to find GTT space for blit to VRAM\n"); |
---|
627 | 606 | return r; |
---|
628 | 607 | } |
---|
629 | 608 | |
---|
.. | .. |
---|
634 | 613 | } |
---|
635 | 614 | |
---|
636 | 615 | /* copy to VRAM */ |
---|
637 | | - r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, new_mem, old_mem); |
---|
| 616 | + r = amdgpu_move_blit(bo, evict, new_mem, old_mem); |
---|
638 | 617 | if (unlikely(r)) { |
---|
639 | 618 | goto out_cleanup; |
---|
640 | 619 | } |
---|
641 | 620 | out_cleanup: |
---|
642 | | - ttm_bo_mem_put(bo, &tmp_mem); |
---|
| 621 | + ttm_resource_free(bo, &tmp_mem); |
---|
643 | 622 | return r; |
---|
| 623 | +} |
---|
| 624 | + |
---|
| 625 | +/** |
---|
| 626 | + * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy |
---|
| 627 | + * |
---|
| 628 | + * Called by amdgpu_bo_move() |
---|
| 629 | + */ |
---|
| 630 | +static bool amdgpu_mem_visible(struct amdgpu_device *adev, |
---|
| 631 | + struct ttm_resource *mem) |
---|
| 632 | +{ |
---|
| 633 | + struct drm_mm_node *nodes = mem->mm_node; |
---|
| 634 | + |
---|
| 635 | + if (mem->mem_type == TTM_PL_SYSTEM || |
---|
| 636 | + mem->mem_type == TTM_PL_TT) |
---|
| 637 | + return true; |
---|
| 638 | + if (mem->mem_type != TTM_PL_VRAM) |
---|
| 639 | + return false; |
---|
| 640 | + |
---|
| 641 | + /* ttm_resource_ioremap only supports contiguous memory */ |
---|
| 642 | + if (nodes->size != mem->num_pages) |
---|
| 643 | + return false; |
---|
| 644 | + |
---|
| 645 | + return ((nodes->start + nodes->size) << PAGE_SHIFT) |
---|
| 646 | + <= adev->gmc.visible_vram_size; |
---|
644 | 647 | } |
---|
645 | 648 | |
---|
646 | 649 | /** |
---|
.. | .. |
---|
650 | 653 | */ |
---|
651 | 654 | static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, |
---|
652 | 655 | struct ttm_operation_ctx *ctx, |
---|
653 | | - struct ttm_mem_reg *new_mem) |
---|
| 656 | + struct ttm_resource *new_mem) |
---|
654 | 657 | { |
---|
655 | 658 | struct amdgpu_device *adev; |
---|
656 | 659 | struct amdgpu_bo *abo; |
---|
657 | | - struct ttm_mem_reg *old_mem = &bo->mem; |
---|
| 660 | + struct ttm_resource *old_mem = &bo->mem; |
---|
658 | 661 | int r; |
---|
659 | 662 | |
---|
660 | 663 | /* Can't move a pinned BO */ |
---|
.. | .. |
---|
665 | 668 | adev = amdgpu_ttm_adev(bo->bdev); |
---|
666 | 669 | |
---|
667 | 670 | if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) { |
---|
668 | | - amdgpu_move_null(bo, new_mem); |
---|
| 671 | + ttm_bo_move_null(bo, new_mem); |
---|
669 | 672 | return 0; |
---|
670 | 673 | } |
---|
671 | 674 | if ((old_mem->mem_type == TTM_PL_TT && |
---|
.. | .. |
---|
673 | 676 | (old_mem->mem_type == TTM_PL_SYSTEM && |
---|
674 | 677 | new_mem->mem_type == TTM_PL_TT)) { |
---|
675 | 678 | /* bind is enough */ |
---|
676 | | - amdgpu_move_null(bo, new_mem); |
---|
| 679 | + ttm_bo_move_null(bo, new_mem); |
---|
| 680 | + return 0; |
---|
| 681 | + } |
---|
| 682 | + if (old_mem->mem_type == AMDGPU_PL_GDS || |
---|
| 683 | + old_mem->mem_type == AMDGPU_PL_GWS || |
---|
| 684 | + old_mem->mem_type == AMDGPU_PL_OA || |
---|
| 685 | + new_mem->mem_type == AMDGPU_PL_GDS || |
---|
| 686 | + new_mem->mem_type == AMDGPU_PL_GWS || |
---|
| 687 | + new_mem->mem_type == AMDGPU_PL_OA) { |
---|
| 688 | + /* Nothing to save here */ |
---|
| 689 | + ttm_bo_move_null(bo, new_mem); |
---|
677 | 690 | return 0; |
---|
678 | 691 | } |
---|
679 | 692 | |
---|
680 | | - if (!adev->mman.buffer_funcs_enabled) |
---|
| 693 | + if (!adev->mman.buffer_funcs_enabled) { |
---|
| 694 | + r = -ENODEV; |
---|
681 | 695 | goto memcpy; |
---|
| 696 | + } |
---|
682 | 697 | |
---|
683 | 698 | if (old_mem->mem_type == TTM_PL_VRAM && |
---|
684 | 699 | new_mem->mem_type == TTM_PL_SYSTEM) { |
---|
.. | .. |
---|
687 | 702 | new_mem->mem_type == TTM_PL_VRAM) { |
---|
688 | 703 | r = amdgpu_move_ram_vram(bo, evict, ctx, new_mem); |
---|
689 | 704 | } else { |
---|
690 | | - r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, |
---|
| 705 | + r = amdgpu_move_blit(bo, evict, |
---|
691 | 706 | new_mem, old_mem); |
---|
692 | 707 | } |
---|
693 | 708 | |
---|
694 | 709 | if (r) { |
---|
695 | 710 | memcpy: |
---|
696 | | - r = ttm_bo_move_memcpy(bo, ctx, new_mem); |
---|
697 | | - if (r) { |
---|
| 711 | + /* Check that all memory is CPU accessible */ |
---|
| 712 | + if (!amdgpu_mem_visible(adev, old_mem) || |
---|
| 713 | + !amdgpu_mem_visible(adev, new_mem)) { |
---|
| 714 | + pr_err("Move buffer fallback to memcpy unavailable\n"); |
---|
698 | 715 | return r; |
---|
699 | 716 | } |
---|
| 717 | + |
---|
| 718 | + r = ttm_bo_move_memcpy(bo, ctx, new_mem); |
---|
| 719 | + if (r) |
---|
| 720 | + return r; |
---|
700 | 721 | } |
---|
701 | 722 | |
---|
702 | 723 | if (bo->type == ttm_bo_type_device && |
---|
.. | .. |
---|
718 | 739 | * |
---|
719 | 740 | * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault() |
---|
720 | 741 | */ |
---|
721 | | -static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) |
---|
| 742 | +static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_resource *mem) |
---|
722 | 743 | { |
---|
723 | | - struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; |
---|
724 | 744 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); |
---|
725 | 745 | struct drm_mm_node *mm_node = mem->mm_node; |
---|
| 746 | + size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT; |
---|
726 | 747 | |
---|
727 | | - mem->bus.addr = NULL; |
---|
728 | | - mem->bus.offset = 0; |
---|
729 | | - mem->bus.size = mem->num_pages << PAGE_SHIFT; |
---|
730 | | - mem->bus.base = 0; |
---|
731 | | - mem->bus.is_iomem = false; |
---|
732 | | - if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE)) |
---|
733 | | - return -EINVAL; |
---|
734 | 748 | switch (mem->mem_type) { |
---|
735 | 749 | case TTM_PL_SYSTEM: |
---|
736 | 750 | /* system memory */ |
---|
.. | .. |
---|
740 | 754 | case TTM_PL_VRAM: |
---|
741 | 755 | mem->bus.offset = mem->start << PAGE_SHIFT; |
---|
742 | 756 | /* check if it's visible */ |
---|
743 | | - if ((mem->bus.offset + mem->bus.size) > adev->gmc.visible_vram_size) |
---|
| 757 | + if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size) |
---|
744 | 758 | return -EINVAL; |
---|
745 | 759 | /* Only physically contiguous buffers apply. In a contiguous |
---|
746 | 760 | * buffer, size of the first mm_node would match the number of |
---|
747 | | - * pages in ttm_mem_reg. |
---|
| 761 | + * pages in ttm_resource. |
---|
748 | 762 | */ |
---|
749 | 763 | if (adev->mman.aper_base_kaddr && |
---|
750 | 764 | (mm_node->size == mem->num_pages)) |
---|
751 | 765 | mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr + |
---|
752 | 766 | mem->bus.offset; |
---|
753 | 767 | |
---|
754 | | - mem->bus.base = adev->gmc.aper_base; |
---|
| 768 | + mem->bus.offset += adev->gmc.aper_base; |
---|
755 | 769 | mem->bus.is_iomem = true; |
---|
756 | 770 | break; |
---|
757 | 771 | default: |
---|
.. | .. |
---|
760 | 774 | return 0; |
---|
761 | 775 | } |
---|
762 | 776 | |
---|
763 | | -static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) |
---|
764 | | -{ |
---|
765 | | -} |
---|
766 | | - |
---|
767 | 777 | static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, |
---|
768 | 778 | unsigned long page_offset) |
---|
769 | 779 | { |
---|
| 780 | + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); |
---|
| 781 | + uint64_t offset = (page_offset << PAGE_SHIFT); |
---|
770 | 782 | struct drm_mm_node *mm; |
---|
771 | | - unsigned long offset = (page_offset << PAGE_SHIFT); |
---|
772 | 783 | |
---|
773 | 784 | mm = amdgpu_find_mm_node(&bo->mem, &offset); |
---|
774 | | - return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + |
---|
775 | | - (offset >> PAGE_SHIFT); |
---|
| 785 | + offset += adev->gmc.aper_base; |
---|
| 786 | + return mm->start + (offset >> PAGE_SHIFT); |
---|
| 787 | +} |
---|
| 788 | + |
---|
| 789 | +/** |
---|
| 790 | + * amdgpu_ttm_domain_start - Returns GPU start address |
---|
| 791 | + * @adev: amdgpu device object |
---|
| 792 | + * @type: type of the memory |
---|
| 793 | + * |
---|
| 794 | + * Returns: |
---|
| 795 | + * GPU start address of a memory domain |
---|
| 796 | + */ |
---|
| 797 | + |
---|
| 798 | +uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type) |
---|
| 799 | +{ |
---|
| 800 | + switch (type) { |
---|
| 801 | + case TTM_PL_TT: |
---|
| 802 | + return adev->gmc.gart_start; |
---|
| 803 | + case TTM_PL_VRAM: |
---|
| 804 | + return adev->gmc.vram_start; |
---|
| 805 | + } |
---|
| 806 | + |
---|
| 807 | + return 0; |
---|
776 | 808 | } |
---|
777 | 809 | |
---|
778 | 810 | /* |
---|
779 | 811 | * TTM backend functions. |
---|
780 | 812 | */ |
---|
781 | | -struct amdgpu_ttm_gup_task_list { |
---|
782 | | - struct list_head list; |
---|
783 | | - struct task_struct *task; |
---|
784 | | -}; |
---|
785 | | - |
---|
786 | 813 | struct amdgpu_ttm_tt { |
---|
787 | 814 | struct ttm_dma_tt ttm; |
---|
| 815 | + struct drm_gem_object *gobj; |
---|
788 | 816 | u64 offset; |
---|
789 | 817 | uint64_t userptr; |
---|
790 | 818 | struct task_struct *usertask; |
---|
791 | 819 | uint32_t userflags; |
---|
792 | | - spinlock_t guptasklock; |
---|
793 | | - struct list_head guptasks; |
---|
794 | | - atomic_t mmu_invalidations; |
---|
795 | | - uint32_t last_set_pages; |
---|
| 820 | + bool bound; |
---|
| 821 | +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) |
---|
| 822 | + struct hmm_range *range; |
---|
| 823 | +#endif |
---|
796 | 824 | }; |
---|
797 | 825 | |
---|
| 826 | +#ifdef CONFIG_DRM_AMDGPU_USERPTR |
---|
798 | 827 | /** |
---|
799 | | - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR |
---|
800 | | - * pointer to memory |
---|
| 828 | + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user |
---|
| 829 | + * memory and start HMM tracking CPU page table update |
---|
801 | 830 | * |
---|
802 | | - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). |
---|
803 | | - * This provides a wrapper around the get_user_pages() call to provide |
---|
804 | | - * device accessible pages that back user memory. |
---|
| 831 | + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only |
---|
| 832 | + * once afterwards to stop HMM tracking |
---|
805 | 833 | */ |
---|
806 | | -int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) |
---|
| 834 | +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) |
---|
807 | 835 | { |
---|
| 836 | + struct ttm_tt *ttm = bo->tbo.ttm; |
---|
808 | 837 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
---|
809 | | - struct mm_struct *mm = gtt->usertask->mm; |
---|
810 | | - unsigned int flags = 0; |
---|
811 | | - unsigned pinned = 0; |
---|
812 | | - int r; |
---|
| 838 | + unsigned long start = gtt->userptr; |
---|
| 839 | + struct vm_area_struct *vma; |
---|
| 840 | + struct hmm_range *range; |
---|
| 841 | + unsigned long timeout; |
---|
| 842 | + struct mm_struct *mm; |
---|
| 843 | + unsigned long i; |
---|
| 844 | + int r = 0; |
---|
813 | 845 | |
---|
814 | | - if (!mm) /* Happens during process shutdown */ |
---|
815 | | - return -ESRCH; |
---|
816 | | - |
---|
817 | | - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) |
---|
818 | | - flags |= FOLL_WRITE; |
---|
819 | | - |
---|
820 | | - down_read(&mm->mmap_sem); |
---|
821 | | - |
---|
822 | | - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { |
---|
823 | | - /* |
---|
824 | | - * check that we only use anonymous memory to prevent problems |
---|
825 | | - * with writeback |
---|
826 | | - */ |
---|
827 | | - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; |
---|
828 | | - struct vm_area_struct *vma; |
---|
829 | | - |
---|
830 | | - vma = find_vma(mm, gtt->userptr); |
---|
831 | | - if (!vma || vma->vm_file || vma->vm_end < end) { |
---|
832 | | - up_read(&mm->mmap_sem); |
---|
833 | | - return -EPERM; |
---|
834 | | - } |
---|
| 846 | + mm = bo->notifier.mm; |
---|
| 847 | + if (unlikely(!mm)) { |
---|
| 848 | + DRM_DEBUG_DRIVER("BO is not registered?\n"); |
---|
| 849 | + return -EFAULT; |
---|
835 | 850 | } |
---|
836 | 851 | |
---|
837 | | - /* loop enough times using contiguous pages of memory */ |
---|
838 | | - do { |
---|
839 | | - unsigned num_pages = ttm->num_pages - pinned; |
---|
840 | | - uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; |
---|
841 | | - struct page **p = pages + pinned; |
---|
842 | | - struct amdgpu_ttm_gup_task_list guptask; |
---|
| 852 | + /* Another get_user_pages is running at the same time?? */ |
---|
| 853 | + if (WARN_ON(gtt->range)) |
---|
| 854 | + return -EFAULT; |
---|
843 | 855 | |
---|
844 | | - guptask.task = current; |
---|
845 | | - spin_lock(>t->guptasklock); |
---|
846 | | - list_add(&guptask.list, >t->guptasks); |
---|
847 | | - spin_unlock(>t->guptasklock); |
---|
| 856 | + if (!mmget_not_zero(mm)) /* Happens during process shutdown */ |
---|
| 857 | + return -ESRCH; |
---|
848 | 858 | |
---|
849 | | - if (mm == current->mm) |
---|
850 | | - r = get_user_pages(userptr, num_pages, flags, p, NULL); |
---|
851 | | - else |
---|
852 | | - r = get_user_pages_remote(gtt->usertask, |
---|
853 | | - mm, userptr, num_pages, |
---|
854 | | - flags, p, NULL, NULL); |
---|
| 859 | + range = kzalloc(sizeof(*range), GFP_KERNEL); |
---|
| 860 | + if (unlikely(!range)) { |
---|
| 861 | + r = -ENOMEM; |
---|
| 862 | + goto out; |
---|
| 863 | + } |
---|
| 864 | + range->notifier = &bo->notifier; |
---|
| 865 | + range->start = bo->notifier.interval_tree.start; |
---|
| 866 | + range->end = bo->notifier.interval_tree.last + 1; |
---|
| 867 | + range->default_flags = HMM_PFN_REQ_FAULT; |
---|
| 868 | + if (!amdgpu_ttm_tt_is_readonly(ttm)) |
---|
| 869 | + range->default_flags |= HMM_PFN_REQ_WRITE; |
---|
855 | 870 | |
---|
856 | | - spin_lock(>t->guptasklock); |
---|
857 | | - list_del(&guptask.list); |
---|
858 | | - spin_unlock(>t->guptasklock); |
---|
| 871 | + range->hmm_pfns = kvmalloc_array(ttm->num_pages, |
---|
| 872 | + sizeof(*range->hmm_pfns), GFP_KERNEL); |
---|
| 873 | + if (unlikely(!range->hmm_pfns)) { |
---|
| 874 | + r = -ENOMEM; |
---|
| 875 | + goto out_free_ranges; |
---|
| 876 | + } |
---|
859 | 877 | |
---|
860 | | - if (r < 0) |
---|
861 | | - goto release_pages; |
---|
| 878 | + mmap_read_lock(mm); |
---|
| 879 | + vma = find_vma(mm, start); |
---|
| 880 | + if (unlikely(!vma || start < vma->vm_start)) { |
---|
| 881 | + r = -EFAULT; |
---|
| 882 | + goto out_unlock; |
---|
| 883 | + } |
---|
| 884 | + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && |
---|
| 885 | + vma->vm_file)) { |
---|
| 886 | + r = -EPERM; |
---|
| 887 | + goto out_unlock; |
---|
| 888 | + } |
---|
| 889 | + mmap_read_unlock(mm); |
---|
| 890 | + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); |
---|
862 | 891 | |
---|
863 | | - pinned += r; |
---|
| 892 | +retry: |
---|
| 893 | + range->notifier_seq = mmu_interval_read_begin(&bo->notifier); |
---|
864 | 894 | |
---|
865 | | - } while (pinned < ttm->num_pages); |
---|
| 895 | + mmap_read_lock(mm); |
---|
| 896 | + r = hmm_range_fault(range); |
---|
| 897 | + mmap_read_unlock(mm); |
---|
| 898 | + if (unlikely(r)) { |
---|
| 899 | + /* |
---|
| 900 | + * FIXME: This timeout should encompass the retry from |
---|
| 901 | + * mmu_interval_read_retry() as well. |
---|
| 902 | + */ |
---|
| 903 | + if (r == -EBUSY && !time_after(jiffies, timeout)) |
---|
| 904 | + goto retry; |
---|
| 905 | + goto out_free_pfns; |
---|
| 906 | + } |
---|
866 | 907 | |
---|
867 | | - up_read(&mm->mmap_sem); |
---|
| 908 | + /* |
---|
| 909 | + * Due to default_flags, all pages are HMM_PFN_VALID or |
---|
| 910 | + * hmm_range_fault() fails. FIXME: The pages cannot be touched outside |
---|
| 911 | + * the notifier_lock, and mmu_interval_read_retry() must be done first. |
---|
| 912 | + */ |
---|
| 913 | + for (i = 0; i < ttm->num_pages; i++) |
---|
| 914 | + pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]); |
---|
| 915 | + |
---|
| 916 | + gtt->range = range; |
---|
| 917 | + mmput(mm); |
---|
| 918 | + |
---|
868 | 919 | return 0; |
---|
869 | 920 | |
---|
870 | | -release_pages: |
---|
871 | | - release_pages(pages, pinned); |
---|
872 | | - up_read(&mm->mmap_sem); |
---|
| 921 | +out_unlock: |
---|
| 922 | + mmap_read_unlock(mm); |
---|
| 923 | +out_free_pfns: |
---|
| 924 | + kvfree(range->hmm_pfns); |
---|
| 925 | +out_free_ranges: |
---|
| 926 | + kfree(range); |
---|
| 927 | +out: |
---|
| 928 | + mmput(mm); |
---|
873 | 929 | return r; |
---|
874 | 930 | } |
---|
| 931 | + |
---|
| 932 | +/** |
---|
| 933 | + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change |
---|
| 934 | + * Check if the pages backing this ttm range have been invalidated |
---|
| 935 | + * |
---|
| 936 | + * Returns: true if pages are still valid |
---|
| 937 | + */ |
---|
| 938 | +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) |
---|
| 939 | +{ |
---|
| 940 | + struct amdgpu_ttm_tt *gtt = (void *)ttm; |
---|
| 941 | + bool r = false; |
---|
| 942 | + |
---|
| 943 | + if (!gtt || !gtt->userptr) |
---|
| 944 | + return false; |
---|
| 945 | + |
---|
| 946 | + DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n", |
---|
| 947 | + gtt->userptr, ttm->num_pages); |
---|
| 948 | + |
---|
| 949 | + WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns, |
---|
| 950 | + "No user pages to check\n"); |
---|
| 951 | + |
---|
| 952 | + if (gtt->range) { |
---|
| 953 | + /* |
---|
| 954 | + * FIXME: Must always hold notifier_lock for this, and must |
---|
| 955 | + * not ignore the return code. |
---|
| 956 | + */ |
---|
| 957 | + r = mmu_interval_read_retry(gtt->range->notifier, |
---|
| 958 | + gtt->range->notifier_seq); |
---|
| 959 | + kvfree(gtt->range->hmm_pfns); |
---|
| 960 | + kfree(gtt->range); |
---|
| 961 | + gtt->range = NULL; |
---|
| 962 | + } |
---|
| 963 | + |
---|
| 964 | + return !r; |
---|
| 965 | +} |
---|
| 966 | +#endif |
---|
875 | 967 | |
---|
876 | 968 | /** |
---|
877 | 969 | * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. |
---|
.. | .. |
---|
882 | 974 | */ |
---|
883 | 975 | void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) |
---|
884 | 976 | { |
---|
885 | | - struct amdgpu_ttm_tt *gtt = (void *)ttm; |
---|
886 | | - unsigned i; |
---|
| 977 | + unsigned long i; |
---|
887 | 978 | |
---|
888 | | - gtt->last_set_pages = atomic_read(>t->mmu_invalidations); |
---|
889 | | - for (i = 0; i < ttm->num_pages; ++i) { |
---|
890 | | - if (ttm->pages[i]) |
---|
891 | | - put_page(ttm->pages[i]); |
---|
892 | | - |
---|
| 979 | + for (i = 0; i < ttm->num_pages; ++i) |
---|
893 | 980 | ttm->pages[i] = pages ? pages[i] : NULL; |
---|
894 | | - } |
---|
895 | | -} |
---|
896 | | - |
---|
897 | | -/** |
---|
898 | | - * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty |
---|
899 | | - * |
---|
900 | | - * Called while unpinning userptr pages |
---|
901 | | - */ |
---|
902 | | -void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) |
---|
903 | | -{ |
---|
904 | | - struct amdgpu_ttm_tt *gtt = (void *)ttm; |
---|
905 | | - unsigned i; |
---|
906 | | - |
---|
907 | | - for (i = 0; i < ttm->num_pages; ++i) { |
---|
908 | | - struct page *page = ttm->pages[i]; |
---|
909 | | - |
---|
910 | | - if (!page) |
---|
911 | | - continue; |
---|
912 | | - |
---|
913 | | - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) |
---|
914 | | - set_page_dirty(page); |
---|
915 | | - |
---|
916 | | - mark_page_accessed(page); |
---|
917 | | - } |
---|
918 | 981 | } |
---|
919 | 982 | |
---|
920 | 983 | /** |
---|
.. | .. |
---|
922 | 985 | * |
---|
923 | 986 | * Called by amdgpu_ttm_backend_bind() |
---|
924 | 987 | **/ |
---|
925 | | -static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) |
---|
| 988 | +static int amdgpu_ttm_tt_pin_userptr(struct ttm_bo_device *bdev, |
---|
| 989 | + struct ttm_tt *ttm) |
---|
926 | 990 | { |
---|
927 | | - struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); |
---|
| 991 | + struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); |
---|
928 | 992 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
---|
929 | | - unsigned nents; |
---|
930 | 993 | int r; |
---|
931 | 994 | |
---|
932 | 995 | int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); |
---|
.. | .. |
---|
941 | 1004 | goto release_sg; |
---|
942 | 1005 | |
---|
943 | 1006 | /* Map SG to device */ |
---|
944 | | - r = -ENOMEM; |
---|
945 | | - nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); |
---|
946 | | - if (nents != ttm->sg->nents) |
---|
| 1007 | + r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0); |
---|
| 1008 | + if (r) |
---|
947 | 1009 | goto release_sg; |
---|
948 | 1010 | |
---|
949 | 1011 | /* convert SG to linear array of pages and dma addresses */ |
---|
.. | .. |
---|
961 | 1023 | /** |
---|
962 | 1024 | * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages |
---|
963 | 1025 | */ |
---|
964 | | -static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) |
---|
| 1026 | +static void amdgpu_ttm_tt_unpin_userptr(struct ttm_bo_device *bdev, |
---|
| 1027 | + struct ttm_tt *ttm) |
---|
965 | 1028 | { |
---|
966 | | - struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); |
---|
| 1029 | + struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); |
---|
967 | 1030 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
---|
968 | 1031 | |
---|
969 | 1032 | int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); |
---|
.. | .. |
---|
975 | 1038 | return; |
---|
976 | 1039 | |
---|
977 | 1040 | /* unmap the pages mapped to the device */ |
---|
978 | | - dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); |
---|
979 | | - |
---|
980 | | - /* mark the pages as dirty */ |
---|
981 | | - amdgpu_ttm_tt_mark_user_pages(ttm); |
---|
982 | | - |
---|
| 1041 | + dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0); |
---|
983 | 1042 | sg_free_table(ttm->sg); |
---|
| 1043 | + |
---|
| 1044 | +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) |
---|
| 1045 | + if (gtt->range) { |
---|
| 1046 | + unsigned long i; |
---|
| 1047 | + |
---|
| 1048 | + for (i = 0; i < ttm->num_pages; i++) { |
---|
| 1049 | + if (ttm->pages[i] != |
---|
| 1050 | + hmm_pfn_to_page(gtt->range->hmm_pfns[i])) |
---|
| 1051 | + break; |
---|
| 1052 | + } |
---|
| 1053 | + |
---|
| 1054 | + WARN((i == ttm->num_pages), "Missing get_user_page_done\n"); |
---|
| 1055 | + } |
---|
| 1056 | +#endif |
---|
984 | 1057 | } |
---|
985 | 1058 | |
---|
986 | | -int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, |
---|
| 1059 | +static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, |
---|
987 | 1060 | struct ttm_buffer_object *tbo, |
---|
988 | 1061 | uint64_t flags) |
---|
989 | 1062 | { |
---|
.. | .. |
---|
992 | 1065 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
---|
993 | 1066 | int r; |
---|
994 | 1067 | |
---|
995 | | - if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) { |
---|
| 1068 | + if (amdgpu_bo_encrypted(abo)) |
---|
| 1069 | + flags |= AMDGPU_PTE_TMZ; |
---|
| 1070 | + |
---|
| 1071 | + if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) { |
---|
996 | 1072 | uint64_t page_idx = 1; |
---|
997 | 1073 | |
---|
998 | 1074 | r = amdgpu_gart_bind(adev, gtt->offset, page_idx, |
---|
.. | .. |
---|
1000 | 1076 | if (r) |
---|
1001 | 1077 | goto gart_bind_fail; |
---|
1002 | 1078 | |
---|
1003 | | - /* Patch mtype of the second part BO */ |
---|
1004 | | - flags &= ~AMDGPU_PTE_MTYPE_MASK; |
---|
1005 | | - flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC); |
---|
| 1079 | + /* The memory type of the first page defaults to UC. Now |
---|
| 1080 | + * modify the memory type to NC from the second page of |
---|
| 1081 | + * the BO onward. |
---|
| 1082 | + */ |
---|
| 1083 | + flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; |
---|
| 1084 | + flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); |
---|
1006 | 1085 | |
---|
1007 | 1086 | r = amdgpu_gart_bind(adev, |
---|
1008 | 1087 | gtt->offset + (page_idx << PAGE_SHIFT), |
---|
.. | .. |
---|
1028 | 1107 | * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem(). |
---|
1029 | 1108 | * This handles binding GTT memory to the device address space. |
---|
1030 | 1109 | */ |
---|
1031 | | -static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, |
---|
1032 | | - struct ttm_mem_reg *bo_mem) |
---|
| 1110 | +static int amdgpu_ttm_backend_bind(struct ttm_bo_device *bdev, |
---|
| 1111 | + struct ttm_tt *ttm, |
---|
| 1112 | + struct ttm_resource *bo_mem) |
---|
1033 | 1113 | { |
---|
1034 | | - struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); |
---|
| 1114 | + struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); |
---|
1035 | 1115 | struct amdgpu_ttm_tt *gtt = (void*)ttm; |
---|
1036 | 1116 | uint64_t flags; |
---|
1037 | 1117 | int r = 0; |
---|
1038 | 1118 | |
---|
| 1119 | + if (!bo_mem) |
---|
| 1120 | + return -EINVAL; |
---|
| 1121 | + |
---|
| 1122 | + if (gtt->bound) |
---|
| 1123 | + return 0; |
---|
| 1124 | + |
---|
1039 | 1125 | if (gtt->userptr) { |
---|
1040 | | - r = amdgpu_ttm_tt_pin_userptr(ttm); |
---|
| 1126 | + r = amdgpu_ttm_tt_pin_userptr(bdev, ttm); |
---|
1041 | 1127 | if (r) { |
---|
1042 | 1128 | DRM_ERROR("failed to pin userptr\n"); |
---|
1043 | 1129 | return r; |
---|
.. | .. |
---|
1069 | 1155 | if (r) |
---|
1070 | 1156 | DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", |
---|
1071 | 1157 | ttm->num_pages, gtt->offset); |
---|
| 1158 | + gtt->bound = true; |
---|
1072 | 1159 | return r; |
---|
1073 | 1160 | } |
---|
1074 | 1161 | |
---|
1075 | 1162 | /** |
---|
1076 | | - * amdgpu_ttm_alloc_gart - Allocate GART memory for buffer object |
---|
| 1163 | + * amdgpu_ttm_alloc_gart - Make sure buffer object is accessible either |
---|
| 1164 | + * through AGP or GART aperture. |
---|
| 1165 | + * |
---|
| 1166 | + * If bo is accessible through AGP aperture, then use AGP aperture |
---|
| 1167 | + * to access bo; otherwise allocate logical space in GART aperture |
---|
| 1168 | + * and map bo to GART aperture. |
---|
1077 | 1169 | */ |
---|
1078 | 1170 | int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) |
---|
1079 | 1171 | { |
---|
1080 | 1172 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); |
---|
1081 | 1173 | struct ttm_operation_ctx ctx = { false, false }; |
---|
1082 | 1174 | struct amdgpu_ttm_tt *gtt = (void*)bo->ttm; |
---|
1083 | | - struct ttm_mem_reg tmp; |
---|
| 1175 | + struct ttm_resource tmp; |
---|
1084 | 1176 | struct ttm_placement placement; |
---|
1085 | 1177 | struct ttm_place placements; |
---|
1086 | | - uint64_t flags; |
---|
| 1178 | + uint64_t addr, flags; |
---|
1087 | 1179 | int r; |
---|
1088 | 1180 | |
---|
1089 | | - if (bo->mem.mem_type != TTM_PL_TT || |
---|
1090 | | - amdgpu_gtt_mgr_has_gart_addr(&bo->mem)) |
---|
| 1181 | + if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET) |
---|
1091 | 1182 | return 0; |
---|
1092 | 1183 | |
---|
1093 | | - /* allocate GTT space */ |
---|
1094 | | - tmp = bo->mem; |
---|
1095 | | - tmp.mm_node = NULL; |
---|
1096 | | - placement.num_placement = 1; |
---|
1097 | | - placement.placement = &placements; |
---|
1098 | | - placement.num_busy_placement = 1; |
---|
1099 | | - placement.busy_placement = &placements; |
---|
1100 | | - placements.fpfn = 0; |
---|
1101 | | - placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; |
---|
1102 | | - placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) | |
---|
1103 | | - TTM_PL_FLAG_TT; |
---|
| 1184 | + addr = amdgpu_gmc_agp_addr(bo); |
---|
| 1185 | + if (addr != AMDGPU_BO_INVALID_OFFSET) { |
---|
| 1186 | + bo->mem.start = addr >> PAGE_SHIFT; |
---|
| 1187 | + } else { |
---|
1104 | 1188 | |
---|
1105 | | - r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx); |
---|
1106 | | - if (unlikely(r)) |
---|
1107 | | - return r; |
---|
| 1189 | + /* allocate GART space */ |
---|
| 1190 | + tmp = bo->mem; |
---|
| 1191 | + tmp.mm_node = NULL; |
---|
| 1192 | + placement.num_placement = 1; |
---|
| 1193 | + placement.placement = &placements; |
---|
| 1194 | + placement.num_busy_placement = 1; |
---|
| 1195 | + placement.busy_placement = &placements; |
---|
| 1196 | + placements.fpfn = 0; |
---|
| 1197 | + placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; |
---|
| 1198 | + placements.mem_type = TTM_PL_TT; |
---|
| 1199 | + placements.flags = bo->mem.placement; |
---|
1108 | 1200 | |
---|
1109 | | - /* compute PTE flags for this buffer object */ |
---|
1110 | | - flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp); |
---|
| 1201 | + r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx); |
---|
| 1202 | + if (unlikely(r)) |
---|
| 1203 | + return r; |
---|
1111 | 1204 | |
---|
1112 | | - /* Bind pages */ |
---|
1113 | | - gtt->offset = (u64)tmp.start << PAGE_SHIFT; |
---|
1114 | | - r = amdgpu_ttm_gart_bind(adev, bo, flags); |
---|
1115 | | - if (unlikely(r)) { |
---|
1116 | | - ttm_bo_mem_put(bo, &tmp); |
---|
1117 | | - return r; |
---|
| 1205 | + /* compute PTE flags for this buffer object */ |
---|
| 1206 | + flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp); |
---|
| 1207 | + |
---|
| 1208 | + /* Bind pages */ |
---|
| 1209 | + gtt->offset = (u64)tmp.start << PAGE_SHIFT; |
---|
| 1210 | + r = amdgpu_ttm_gart_bind(adev, bo, flags); |
---|
| 1211 | + if (unlikely(r)) { |
---|
| 1212 | + ttm_resource_free(bo, &tmp); |
---|
| 1213 | + return r; |
---|
| 1214 | + } |
---|
| 1215 | + |
---|
| 1216 | + ttm_resource_free(bo, &bo->mem); |
---|
| 1217 | + bo->mem = tmp; |
---|
1118 | 1218 | } |
---|
1119 | | - |
---|
1120 | | - ttm_bo_mem_put(bo, &bo->mem); |
---|
1121 | | - bo->mem = tmp; |
---|
1122 | | - bo->offset = (bo->mem.start << PAGE_SHIFT) + |
---|
1123 | | - bo->bdev->man[bo->mem.mem_type].gpu_offset; |
---|
1124 | 1219 | |
---|
1125 | 1220 | return 0; |
---|
1126 | 1221 | } |
---|
.. | .. |
---|
1152 | 1247 | * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and |
---|
1153 | 1248 | * ttm_tt_destroy(). |
---|
1154 | 1249 | */ |
---|
1155 | | -static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) |
---|
| 1250 | +static void amdgpu_ttm_backend_unbind(struct ttm_bo_device *bdev, |
---|
| 1251 | + struct ttm_tt *ttm) |
---|
1156 | 1252 | { |
---|
1157 | | - struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); |
---|
| 1253 | + struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); |
---|
1158 | 1254 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
---|
1159 | 1255 | int r; |
---|
1160 | 1256 | |
---|
1161 | 1257 | /* if the pages have userptr pinning then clear that first */ |
---|
1162 | 1258 | if (gtt->userptr) |
---|
1163 | | - amdgpu_ttm_tt_unpin_userptr(ttm); |
---|
| 1259 | + amdgpu_ttm_tt_unpin_userptr(bdev, ttm); |
---|
| 1260 | + |
---|
| 1261 | + if (!gtt->bound) |
---|
| 1262 | + return; |
---|
1164 | 1263 | |
---|
1165 | 1264 | if (gtt->offset == AMDGPU_BO_INVALID_OFFSET) |
---|
1166 | | - return 0; |
---|
| 1265 | + return; |
---|
1167 | 1266 | |
---|
1168 | 1267 | /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ |
---|
1169 | 1268 | r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages); |
---|
1170 | 1269 | if (r) |
---|
1171 | 1270 | DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n", |
---|
1172 | 1271 | gtt->ttm.ttm.num_pages, gtt->offset); |
---|
1173 | | - return r; |
---|
| 1272 | + gtt->bound = false; |
---|
1174 | 1273 | } |
---|
1175 | 1274 | |
---|
1176 | | -static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) |
---|
| 1275 | +static void amdgpu_ttm_backend_destroy(struct ttm_bo_device *bdev, |
---|
| 1276 | + struct ttm_tt *ttm) |
---|
1177 | 1277 | { |
---|
1178 | 1278 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
---|
1179 | 1279 | |
---|
| 1280 | + amdgpu_ttm_backend_unbind(bdev, ttm); |
---|
| 1281 | + ttm_tt_destroy_common(bdev, ttm); |
---|
1180 | 1282 | if (gtt->usertask) |
---|
1181 | 1283 | put_task_struct(gtt->usertask); |
---|
1182 | 1284 | |
---|
1183 | 1285 | ttm_dma_tt_fini(>t->ttm); |
---|
1184 | 1286 | kfree(gtt); |
---|
1185 | 1287 | } |
---|
1186 | | - |
---|
1187 | | -static struct ttm_backend_func amdgpu_backend_func = { |
---|
1188 | | - .bind = &amdgpu_ttm_backend_bind, |
---|
1189 | | - .unbind = &amdgpu_ttm_backend_unbind, |
---|
1190 | | - .destroy = &amdgpu_ttm_backend_destroy, |
---|
1191 | | -}; |
---|
1192 | 1288 | |
---|
1193 | 1289 | /** |
---|
1194 | 1290 | * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO |
---|
.. | .. |
---|
1200 | 1296 | static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, |
---|
1201 | 1297 | uint32_t page_flags) |
---|
1202 | 1298 | { |
---|
1203 | | - struct amdgpu_device *adev; |
---|
1204 | 1299 | struct amdgpu_ttm_tt *gtt; |
---|
1205 | | - |
---|
1206 | | - adev = amdgpu_ttm_adev(bo->bdev); |
---|
1207 | 1300 | |
---|
1208 | 1301 | gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL); |
---|
1209 | 1302 | if (gtt == NULL) { |
---|
1210 | 1303 | return NULL; |
---|
1211 | 1304 | } |
---|
1212 | | - gtt->ttm.ttm.func = &amdgpu_backend_func; |
---|
| 1305 | + gtt->gobj = &bo->base; |
---|
1213 | 1306 | |
---|
1214 | 1307 | /* allocate space for the uninitialized page entries */ |
---|
1215 | 1308 | if (ttm_sg_tt_init(>t->ttm, bo, page_flags)) { |
---|
.. | .. |
---|
1225 | 1318 | * Map the pages of a ttm_tt object to an address space visible |
---|
1226 | 1319 | * to the underlying device. |
---|
1227 | 1320 | */ |
---|
1228 | | -static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, |
---|
1229 | | - struct ttm_operation_ctx *ctx) |
---|
| 1321 | +static int amdgpu_ttm_tt_populate(struct ttm_bo_device *bdev, |
---|
| 1322 | + struct ttm_tt *ttm, |
---|
| 1323 | + struct ttm_operation_ctx *ctx) |
---|
1230 | 1324 | { |
---|
1231 | | - struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); |
---|
| 1325 | + struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); |
---|
1232 | 1326 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
---|
1233 | | - bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); |
---|
1234 | 1327 | |
---|
1235 | 1328 | /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */ |
---|
1236 | 1329 | if (gtt && gtt->userptr) { |
---|
.. | .. |
---|
1239 | 1332 | return -ENOMEM; |
---|
1240 | 1333 | |
---|
1241 | 1334 | ttm->page_flags |= TTM_PAGE_FLAG_SG; |
---|
1242 | | - ttm->state = tt_unbound; |
---|
| 1335 | + ttm_tt_set_populated(ttm); |
---|
1243 | 1336 | return 0; |
---|
1244 | 1337 | } |
---|
1245 | 1338 | |
---|
1246 | | - if (slave && ttm->sg) { |
---|
| 1339 | + if (ttm->page_flags & TTM_PAGE_FLAG_SG) { |
---|
| 1340 | + if (!ttm->sg) { |
---|
| 1341 | + struct dma_buf_attachment *attach; |
---|
| 1342 | + struct sg_table *sgt; |
---|
| 1343 | + |
---|
| 1344 | + attach = gtt->gobj->import_attach; |
---|
| 1345 | + sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); |
---|
| 1346 | + if (IS_ERR(sgt)) |
---|
| 1347 | + return PTR_ERR(sgt); |
---|
| 1348 | + |
---|
| 1349 | + ttm->sg = sgt; |
---|
| 1350 | + } |
---|
| 1351 | + |
---|
1247 | 1352 | drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, |
---|
1248 | 1353 | gtt->ttm.dma_address, |
---|
1249 | 1354 | ttm->num_pages); |
---|
1250 | | - ttm->state = tt_unbound; |
---|
| 1355 | + ttm_tt_set_populated(ttm); |
---|
1251 | 1356 | return 0; |
---|
1252 | 1357 | } |
---|
1253 | 1358 | |
---|
.. | .. |
---|
1268 | 1373 | * Unmaps pages of a ttm_tt object from the device address space and |
---|
1269 | 1374 | * unpopulates the page array backing it. |
---|
1270 | 1375 | */ |
---|
1271 | | -static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) |
---|
| 1376 | +static void amdgpu_ttm_tt_unpopulate(struct ttm_bo_device *bdev, struct ttm_tt *ttm) |
---|
1272 | 1377 | { |
---|
1273 | | - struct amdgpu_device *adev; |
---|
1274 | 1378 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
---|
1275 | | - bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); |
---|
| 1379 | + struct amdgpu_device *adev; |
---|
1276 | 1380 | |
---|
1277 | 1381 | if (gtt && gtt->userptr) { |
---|
1278 | 1382 | amdgpu_ttm_tt_set_user_pages(ttm, NULL); |
---|
.. | .. |
---|
1282 | 1386 | return; |
---|
1283 | 1387 | } |
---|
1284 | 1388 | |
---|
1285 | | - if (slave) |
---|
| 1389 | + if (ttm->sg && gtt->gobj->import_attach) { |
---|
| 1390 | + struct dma_buf_attachment *attach; |
---|
| 1391 | + |
---|
| 1392 | + attach = gtt->gobj->import_attach; |
---|
| 1393 | + dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL); |
---|
| 1394 | + ttm->sg = NULL; |
---|
| 1395 | + return; |
---|
| 1396 | + } |
---|
| 1397 | + |
---|
| 1398 | + if (ttm->page_flags & TTM_PAGE_FLAG_SG) |
---|
1286 | 1399 | return; |
---|
1287 | 1400 | |
---|
1288 | | - adev = amdgpu_ttm_adev(ttm->bdev); |
---|
| 1401 | + adev = amdgpu_ttm_adev(bdev); |
---|
1289 | 1402 | |
---|
1290 | 1403 | #ifdef CONFIG_SWIOTLB |
---|
1291 | 1404 | if (adev->need_swiotlb && swiotlb_nr_tbl()) { |
---|
.. | .. |
---|
1302 | 1415 | * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current |
---|
1303 | 1416 | * task |
---|
1304 | 1417 | * |
---|
1305 | | - * @ttm: The ttm_tt object to bind this userptr object to |
---|
| 1418 | + * @bo: The ttm_buffer_object to bind this userptr to |
---|
1306 | 1419 | * @addr: The address in the current tasks VM space to use |
---|
1307 | 1420 | * @flags: Requirements of userptr object. |
---|
1308 | 1421 | * |
---|
1309 | 1422 | * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages |
---|
1310 | 1423 | * to current task |
---|
1311 | 1424 | */ |
---|
1312 | | -int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, |
---|
1313 | | - uint32_t flags) |
---|
| 1425 | +int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, |
---|
| 1426 | + uint64_t addr, uint32_t flags) |
---|
1314 | 1427 | { |
---|
1315 | | - struct amdgpu_ttm_tt *gtt = (void *)ttm; |
---|
| 1428 | + struct amdgpu_ttm_tt *gtt; |
---|
1316 | 1429 | |
---|
1317 | | - if (gtt == NULL) |
---|
1318 | | - return -EINVAL; |
---|
| 1430 | + if (!bo->ttm) { |
---|
| 1431 | + /* TODO: We want a separate TTM object type for userptrs */ |
---|
| 1432 | + bo->ttm = amdgpu_ttm_tt_create(bo, 0); |
---|
| 1433 | + if (bo->ttm == NULL) |
---|
| 1434 | + return -ENOMEM; |
---|
| 1435 | + } |
---|
1319 | 1436 | |
---|
| 1437 | + gtt = (void*)bo->ttm; |
---|
1320 | 1438 | gtt->userptr = addr; |
---|
1321 | 1439 | gtt->userflags = flags; |
---|
1322 | 1440 | |
---|
.. | .. |
---|
1324 | 1442 | put_task_struct(gtt->usertask); |
---|
1325 | 1443 | gtt->usertask = current->group_leader; |
---|
1326 | 1444 | get_task_struct(gtt->usertask); |
---|
1327 | | - |
---|
1328 | | - spin_lock_init(>t->guptasklock); |
---|
1329 | | - INIT_LIST_HEAD(>t->guptasks); |
---|
1330 | | - atomic_set(>t->mmu_invalidations, 0); |
---|
1331 | | - gtt->last_set_pages = 0; |
---|
1332 | 1445 | |
---|
1333 | 1446 | return 0; |
---|
1334 | 1447 | } |
---|
.. | .. |
---|
1358 | 1471 | unsigned long end) |
---|
1359 | 1472 | { |
---|
1360 | 1473 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
---|
1361 | | - struct amdgpu_ttm_gup_task_list *entry; |
---|
1362 | 1474 | unsigned long size; |
---|
1363 | 1475 | |
---|
1364 | 1476 | if (gtt == NULL || !gtt->userptr) |
---|
.. | .. |
---|
1371 | 1483 | if (gtt->userptr > end || gtt->userptr + size <= start) |
---|
1372 | 1484 | return false; |
---|
1373 | 1485 | |
---|
1374 | | - /* Search the lists of tasks that hold this mapping and see |
---|
1375 | | - * if current is one of them. If it is return false. |
---|
1376 | | - */ |
---|
1377 | | - spin_lock(>t->guptasklock); |
---|
1378 | | - list_for_each_entry(entry, >t->guptasks, list) { |
---|
1379 | | - if (entry->task == current) { |
---|
1380 | | - spin_unlock(>t->guptasklock); |
---|
1381 | | - return false; |
---|
1382 | | - } |
---|
1383 | | - } |
---|
1384 | | - spin_unlock(>t->guptasklock); |
---|
1385 | | - |
---|
1386 | | - atomic_inc(>t->mmu_invalidations); |
---|
1387 | | - |
---|
1388 | 1486 | return true; |
---|
1389 | 1487 | } |
---|
1390 | 1488 | |
---|
1391 | 1489 | /** |
---|
1392 | | - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? |
---|
| 1490 | + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? |
---|
1393 | 1491 | */ |
---|
1394 | | -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, |
---|
1395 | | - int *last_invalidated) |
---|
1396 | | -{ |
---|
1397 | | - struct amdgpu_ttm_tt *gtt = (void *)ttm; |
---|
1398 | | - int prev_invalidated = *last_invalidated; |
---|
1399 | | - |
---|
1400 | | - *last_invalidated = atomic_read(>t->mmu_invalidations); |
---|
1401 | | - return prev_invalidated != *last_invalidated; |
---|
1402 | | -} |
---|
1403 | | - |
---|
1404 | | -/** |
---|
1405 | | - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object |
---|
1406 | | - * been invalidated since the last time they've been set? |
---|
1407 | | - */ |
---|
1408 | | -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) |
---|
| 1492 | +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) |
---|
1409 | 1493 | { |
---|
1410 | 1494 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
---|
1411 | 1495 | |
---|
1412 | 1496 | if (gtt == NULL || !gtt->userptr) |
---|
1413 | 1497 | return false; |
---|
1414 | 1498 | |
---|
1415 | | - return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; |
---|
| 1499 | + return true; |
---|
1416 | 1500 | } |
---|
1417 | 1501 | |
---|
1418 | 1502 | /** |
---|
.. | .. |
---|
1429 | 1513 | } |
---|
1430 | 1514 | |
---|
1431 | 1515 | /** |
---|
1432 | | - * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object |
---|
| 1516 | + * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object |
---|
1433 | 1517 | * |
---|
1434 | 1518 | * @ttm: The ttm_tt object to compute the flags for |
---|
1435 | 1519 | * @mem: The memory registry backing this ttm_tt object |
---|
| 1520 | + * |
---|
| 1521 | + * Figure out the flags to use for a VM PDE (Page Directory Entry). |
---|
1436 | 1522 | */ |
---|
1437 | | -uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, |
---|
1438 | | - struct ttm_mem_reg *mem) |
---|
| 1523 | +uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem) |
---|
1439 | 1524 | { |
---|
1440 | 1525 | uint64_t flags = 0; |
---|
1441 | 1526 | |
---|
.. | .. |
---|
1448 | 1533 | if (ttm->caching_state == tt_cached) |
---|
1449 | 1534 | flags |= AMDGPU_PTE_SNOOPED; |
---|
1450 | 1535 | } |
---|
| 1536 | + |
---|
| 1537 | + return flags; |
---|
| 1538 | +} |
---|
| 1539 | + |
---|
| 1540 | +/** |
---|
| 1541 | + * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object |
---|
| 1542 | + * |
---|
| 1543 | + * @ttm: The ttm_tt object to compute the flags for |
---|
| 1544 | + * @mem: The memory registry backing this ttm_tt object |
---|
| 1545 | + |
---|
| 1546 | + * Figure out the flags to use for a VM PTE (Page Table Entry). |
---|
| 1547 | + */ |
---|
| 1548 | +uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, |
---|
| 1549 | + struct ttm_resource *mem) |
---|
| 1550 | +{ |
---|
| 1551 | + uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem); |
---|
1451 | 1552 | |
---|
1452 | 1553 | flags |= adev->gart.gart_pte_flags; |
---|
1453 | 1554 | flags |= AMDGPU_PTE_READABLE; |
---|
.. | .. |
---|
1472 | 1573 | { |
---|
1473 | 1574 | unsigned long num_pages = bo->mem.num_pages; |
---|
1474 | 1575 | struct drm_mm_node *node = bo->mem.mm_node; |
---|
1475 | | - struct reservation_object_list *flist; |
---|
| 1576 | + struct dma_resv_list *flist; |
---|
1476 | 1577 | struct dma_fence *f; |
---|
1477 | 1578 | int i; |
---|
| 1579 | + |
---|
| 1580 | + if (bo->type == ttm_bo_type_kernel && |
---|
| 1581 | + !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo))) |
---|
| 1582 | + return false; |
---|
1478 | 1583 | |
---|
1479 | 1584 | /* If bo is a KFD BO, check if the bo belongs to the current process. |
---|
1480 | 1585 | * If true, then return false as any KFD process needs all its BOs to |
---|
1481 | 1586 | * be resident to run successfully |
---|
1482 | 1587 | */ |
---|
1483 | | - flist = reservation_object_get_list(bo->resv); |
---|
| 1588 | + flist = dma_resv_get_list(bo->base.resv); |
---|
1484 | 1589 | if (flist) { |
---|
1485 | 1590 | for (i = 0; i < flist->shared_count; ++i) { |
---|
1486 | 1591 | f = rcu_dereference_protected(flist->shared[i], |
---|
1487 | | - reservation_object_held(bo->resv)); |
---|
| 1592 | + dma_resv_held(bo->base.resv)); |
---|
1488 | 1593 | if (amdkfd_fence_check_mm(f, current->mm)) |
---|
1489 | 1594 | return false; |
---|
1490 | 1595 | } |
---|
.. | .. |
---|
1492 | 1597 | |
---|
1493 | 1598 | switch (bo->mem.mem_type) { |
---|
1494 | 1599 | case TTM_PL_TT: |
---|
| 1600 | + if (amdgpu_bo_is_amdgpu_bo(bo) && |
---|
| 1601 | + amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo))) |
---|
| 1602 | + return false; |
---|
1495 | 1603 | return true; |
---|
1496 | 1604 | |
---|
1497 | 1605 | case TTM_PL_VRAM: |
---|
.. | .. |
---|
1540 | 1648 | if (bo->mem.mem_type != TTM_PL_VRAM) |
---|
1541 | 1649 | return -EIO; |
---|
1542 | 1650 | |
---|
1543 | | - nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset); |
---|
1544 | | - pos = (nodes->start << PAGE_SHIFT) + offset; |
---|
| 1651 | + pos = offset; |
---|
| 1652 | + nodes = amdgpu_find_mm_node(&abo->tbo.mem, &pos); |
---|
| 1653 | + pos += (nodes->start << PAGE_SHIFT); |
---|
1545 | 1654 | |
---|
1546 | 1655 | while (len && pos < adev->gmc.mc_vram_size) { |
---|
1547 | 1656 | uint64_t aligned_pos = pos & ~(uint64_t)3; |
---|
1548 | | - uint32_t bytes = 4 - (pos & 3); |
---|
| 1657 | + uint64_t bytes = 4 - (pos & 3); |
---|
1549 | 1658 | uint32_t shift = (pos & 3) * 8; |
---|
1550 | 1659 | uint32_t mask = 0xffffffff << shift; |
---|
1551 | 1660 | |
---|
.. | .. |
---|
1554 | 1663 | bytes = len; |
---|
1555 | 1664 | } |
---|
1556 | 1665 | |
---|
1557 | | - spin_lock_irqsave(&adev->mmio_idx_lock, flags); |
---|
1558 | | - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000); |
---|
1559 | | - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); |
---|
1560 | | - if (!write || mask != 0xffffffff) |
---|
1561 | | - value = RREG32_NO_KIQ(mmMM_DATA); |
---|
1562 | | - if (write) { |
---|
1563 | | - value &= ~mask; |
---|
1564 | | - value |= (*(uint32_t *)buf << shift) & mask; |
---|
1565 | | - WREG32_NO_KIQ(mmMM_DATA, value); |
---|
1566 | | - } |
---|
1567 | | - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); |
---|
1568 | | - if (!write) { |
---|
1569 | | - value = (value & mask) >> shift; |
---|
1570 | | - memcpy(buf, &value, bytes); |
---|
| 1666 | + if (mask != 0xffffffff) { |
---|
| 1667 | + spin_lock_irqsave(&adev->mmio_idx_lock, flags); |
---|
| 1668 | + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000); |
---|
| 1669 | + WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); |
---|
| 1670 | + if (!write || mask != 0xffffffff) |
---|
| 1671 | + value = RREG32_NO_KIQ(mmMM_DATA); |
---|
| 1672 | + if (write) { |
---|
| 1673 | + value &= ~mask; |
---|
| 1674 | + value |= (*(uint32_t *)buf << shift) & mask; |
---|
| 1675 | + WREG32_NO_KIQ(mmMM_DATA, value); |
---|
| 1676 | + } |
---|
| 1677 | + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); |
---|
| 1678 | + if (!write) { |
---|
| 1679 | + value = (value & mask) >> shift; |
---|
| 1680 | + memcpy(buf, &value, bytes); |
---|
| 1681 | + } |
---|
| 1682 | + } else { |
---|
| 1683 | + bytes = (nodes->start + nodes->size) << PAGE_SHIFT; |
---|
| 1684 | + bytes = min(bytes - pos, (uint64_t)len & ~0x3ull); |
---|
| 1685 | + |
---|
| 1686 | + amdgpu_device_vram_access(adev, pos, (uint32_t *)buf, |
---|
| 1687 | + bytes, write); |
---|
1571 | 1688 | } |
---|
1572 | 1689 | |
---|
1573 | 1690 | ret += bytes; |
---|
.. | .. |
---|
1587 | 1704 | .ttm_tt_create = &amdgpu_ttm_tt_create, |
---|
1588 | 1705 | .ttm_tt_populate = &amdgpu_ttm_tt_populate, |
---|
1589 | 1706 | .ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate, |
---|
1590 | | - .invalidate_caches = &amdgpu_invalidate_caches, |
---|
1591 | | - .init_mem_type = &amdgpu_init_mem_type, |
---|
| 1707 | + .ttm_tt_bind = &amdgpu_ttm_backend_bind, |
---|
| 1708 | + .ttm_tt_unbind = &amdgpu_ttm_backend_unbind, |
---|
| 1709 | + .ttm_tt_destroy = &amdgpu_ttm_backend_destroy, |
---|
1592 | 1710 | .eviction_valuable = amdgpu_ttm_bo_eviction_valuable, |
---|
1593 | 1711 | .evict_flags = &amdgpu_evict_flags, |
---|
1594 | 1712 | .move = &amdgpu_bo_move, |
---|
1595 | 1713 | .verify_access = &amdgpu_verify_access, |
---|
1596 | 1714 | .move_notify = &amdgpu_bo_move_notify, |
---|
| 1715 | + .release_notify = &amdgpu_bo_release_notify, |
---|
1597 | 1716 | .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify, |
---|
1598 | 1717 | .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, |
---|
1599 | | - .io_mem_free = &amdgpu_ttm_io_mem_free, |
---|
1600 | 1718 | .io_mem_pfn = amdgpu_ttm_io_mem_pfn, |
---|
1601 | | - .access_memory = &amdgpu_ttm_access_memory |
---|
| 1719 | + .access_memory = &amdgpu_ttm_access_memory, |
---|
| 1720 | + .del_from_lru_notify = &amdgpu_vm_del_from_lru_notify |
---|
1602 | 1721 | }; |
---|
1603 | 1722 | |
---|
1604 | 1723 | /* |
---|
.. | .. |
---|
1613 | 1732 | */ |
---|
1614 | 1733 | static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev) |
---|
1615 | 1734 | { |
---|
1616 | | - amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo, |
---|
1617 | | - NULL, &adev->fw_vram_usage.va); |
---|
| 1735 | + amdgpu_bo_free_kernel(&adev->mman.fw_vram_usage_reserved_bo, |
---|
| 1736 | + NULL, &adev->mman.fw_vram_usage_va); |
---|
1618 | 1737 | } |
---|
1619 | 1738 | |
---|
1620 | 1739 | /** |
---|
.. | .. |
---|
1626 | 1745 | */ |
---|
1627 | 1746 | static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) |
---|
1628 | 1747 | { |
---|
1629 | | - struct ttm_operation_ctx ctx = { false, false }; |
---|
1630 | | - struct amdgpu_bo_param bp; |
---|
1631 | | - int r = 0; |
---|
1632 | | - int i; |
---|
1633 | | - u64 vram_size = adev->gmc.visible_vram_size; |
---|
1634 | | - u64 offset = adev->fw_vram_usage.start_offset; |
---|
1635 | | - u64 size = adev->fw_vram_usage.size; |
---|
1636 | | - struct amdgpu_bo *bo; |
---|
| 1748 | + uint64_t vram_size = adev->gmc.visible_vram_size; |
---|
1637 | 1749 | |
---|
1638 | | - memset(&bp, 0, sizeof(bp)); |
---|
1639 | | - bp.size = adev->fw_vram_usage.size; |
---|
1640 | | - bp.byte_align = PAGE_SIZE; |
---|
1641 | | - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; |
---|
1642 | | - bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | |
---|
1643 | | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; |
---|
1644 | | - bp.type = ttm_bo_type_kernel; |
---|
1645 | | - bp.resv = NULL; |
---|
1646 | | - adev->fw_vram_usage.va = NULL; |
---|
1647 | | - adev->fw_vram_usage.reserved_bo = NULL; |
---|
| 1750 | + adev->mman.fw_vram_usage_va = NULL; |
---|
| 1751 | + adev->mman.fw_vram_usage_reserved_bo = NULL; |
---|
1648 | 1752 | |
---|
1649 | | - if (adev->fw_vram_usage.size > 0 && |
---|
1650 | | - adev->fw_vram_usage.size <= vram_size) { |
---|
| 1753 | + if (adev->mman.fw_vram_usage_size == 0 || |
---|
| 1754 | + adev->mman.fw_vram_usage_size > vram_size) |
---|
| 1755 | + return 0; |
---|
1651 | 1756 | |
---|
1652 | | - r = amdgpu_bo_create(adev, &bp, |
---|
1653 | | - &adev->fw_vram_usage.reserved_bo); |
---|
1654 | | - if (r) |
---|
1655 | | - goto error_create; |
---|
1656 | | - |
---|
1657 | | - r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false); |
---|
1658 | | - if (r) |
---|
1659 | | - goto error_reserve; |
---|
1660 | | - |
---|
1661 | | - /* remove the original mem node and create a new one at the |
---|
1662 | | - * request position |
---|
1663 | | - */ |
---|
1664 | | - bo = adev->fw_vram_usage.reserved_bo; |
---|
1665 | | - offset = ALIGN(offset, PAGE_SIZE); |
---|
1666 | | - for (i = 0; i < bo->placement.num_placement; ++i) { |
---|
1667 | | - bo->placements[i].fpfn = offset >> PAGE_SHIFT; |
---|
1668 | | - bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; |
---|
1669 | | - } |
---|
1670 | | - |
---|
1671 | | - ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem); |
---|
1672 | | - r = ttm_bo_mem_space(&bo->tbo, &bo->placement, |
---|
1673 | | - &bo->tbo.mem, &ctx); |
---|
1674 | | - if (r) |
---|
1675 | | - goto error_pin; |
---|
1676 | | - |
---|
1677 | | - r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo, |
---|
1678 | | - AMDGPU_GEM_DOMAIN_VRAM, |
---|
1679 | | - adev->fw_vram_usage.start_offset, |
---|
1680 | | - (adev->fw_vram_usage.start_offset + |
---|
1681 | | - adev->fw_vram_usage.size)); |
---|
1682 | | - if (r) |
---|
1683 | | - goto error_pin; |
---|
1684 | | - r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo, |
---|
1685 | | - &adev->fw_vram_usage.va); |
---|
1686 | | - if (r) |
---|
1687 | | - goto error_kmap; |
---|
1688 | | - |
---|
1689 | | - amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); |
---|
1690 | | - } |
---|
1691 | | - return r; |
---|
1692 | | - |
---|
1693 | | -error_kmap: |
---|
1694 | | - amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo); |
---|
1695 | | -error_pin: |
---|
1696 | | - amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); |
---|
1697 | | -error_reserve: |
---|
1698 | | - amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo); |
---|
1699 | | -error_create: |
---|
1700 | | - adev->fw_vram_usage.va = NULL; |
---|
1701 | | - adev->fw_vram_usage.reserved_bo = NULL; |
---|
1702 | | - return r; |
---|
| 1757 | + return amdgpu_bo_create_kernel_at(adev, |
---|
| 1758 | + adev->mman.fw_vram_usage_start_offset, |
---|
| 1759 | + adev->mman.fw_vram_usage_size, |
---|
| 1760 | + AMDGPU_GEM_DOMAIN_VRAM, |
---|
| 1761 | + &adev->mman.fw_vram_usage_reserved_bo, |
---|
| 1762 | + &adev->mman.fw_vram_usage_va); |
---|
1703 | 1763 | } |
---|
| 1764 | + |
---|
| 1765 | +/* |
---|
| 1766 | + * Memoy training reservation functions |
---|
| 1767 | + */ |
---|
| 1768 | + |
---|
| 1769 | +/** |
---|
| 1770 | + * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram |
---|
| 1771 | + * |
---|
| 1772 | + * @adev: amdgpu_device pointer |
---|
| 1773 | + * |
---|
| 1774 | + * free memory training reserved vram if it has been reserved. |
---|
| 1775 | + */ |
---|
| 1776 | +static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev) |
---|
| 1777 | +{ |
---|
| 1778 | + struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; |
---|
| 1779 | + |
---|
| 1780 | + ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT; |
---|
| 1781 | + amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL); |
---|
| 1782 | + ctx->c2p_bo = NULL; |
---|
| 1783 | + |
---|
| 1784 | + return 0; |
---|
| 1785 | +} |
---|
| 1786 | + |
---|
| 1787 | +static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev) |
---|
| 1788 | +{ |
---|
| 1789 | + struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; |
---|
| 1790 | + |
---|
| 1791 | + memset(ctx, 0, sizeof(*ctx)); |
---|
| 1792 | + |
---|
| 1793 | + ctx->c2p_train_data_offset = |
---|
| 1794 | + ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M); |
---|
| 1795 | + ctx->p2c_train_data_offset = |
---|
| 1796 | + (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); |
---|
| 1797 | + ctx->train_data_size = |
---|
| 1798 | + GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES; |
---|
| 1799 | + |
---|
| 1800 | + DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", |
---|
| 1801 | + ctx->train_data_size, |
---|
| 1802 | + ctx->p2c_train_data_offset, |
---|
| 1803 | + ctx->c2p_train_data_offset); |
---|
| 1804 | +} |
---|
| 1805 | + |
---|
| 1806 | +/* |
---|
| 1807 | + * reserve TMR memory at the top of VRAM which holds |
---|
| 1808 | + * IP Discovery data and is protected by PSP. |
---|
| 1809 | + */ |
---|
| 1810 | +static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) |
---|
| 1811 | +{ |
---|
| 1812 | + int ret; |
---|
| 1813 | + struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; |
---|
| 1814 | + bool mem_train_support = false; |
---|
| 1815 | + |
---|
| 1816 | + if (!amdgpu_sriov_vf(adev)) { |
---|
| 1817 | + ret = amdgpu_mem_train_support(adev); |
---|
| 1818 | + if (ret == 1) |
---|
| 1819 | + mem_train_support = true; |
---|
| 1820 | + else if (ret == -1) |
---|
| 1821 | + return -EINVAL; |
---|
| 1822 | + else |
---|
| 1823 | + DRM_DEBUG("memory training does not support!\n"); |
---|
| 1824 | + } |
---|
| 1825 | + |
---|
| 1826 | + /* |
---|
| 1827 | + * Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all |
---|
| 1828 | + * the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc) |
---|
| 1829 | + * |
---|
| 1830 | + * Otherwise, fallback to legacy approach to check and reserve tmr block for ip |
---|
| 1831 | + * discovery data and G6 memory training data respectively |
---|
| 1832 | + */ |
---|
| 1833 | + adev->mman.discovery_tmr_size = |
---|
| 1834 | + amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); |
---|
| 1835 | + if (!adev->mman.discovery_tmr_size) |
---|
| 1836 | + adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET; |
---|
| 1837 | + |
---|
| 1838 | + if (mem_train_support) { |
---|
| 1839 | + /* reserve vram for mem train according to TMR location */ |
---|
| 1840 | + amdgpu_ttm_training_data_block_init(adev); |
---|
| 1841 | + ret = amdgpu_bo_create_kernel_at(adev, |
---|
| 1842 | + ctx->c2p_train_data_offset, |
---|
| 1843 | + ctx->train_data_size, |
---|
| 1844 | + AMDGPU_GEM_DOMAIN_VRAM, |
---|
| 1845 | + &ctx->c2p_bo, |
---|
| 1846 | + NULL); |
---|
| 1847 | + if (ret) { |
---|
| 1848 | + DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret); |
---|
| 1849 | + amdgpu_ttm_training_reserve_vram_fini(adev); |
---|
| 1850 | + return ret; |
---|
| 1851 | + } |
---|
| 1852 | + ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; |
---|
| 1853 | + } |
---|
| 1854 | + |
---|
| 1855 | + ret = amdgpu_bo_create_kernel_at(adev, |
---|
| 1856 | + adev->gmc.real_vram_size - adev->mman.discovery_tmr_size, |
---|
| 1857 | + adev->mman.discovery_tmr_size, |
---|
| 1858 | + AMDGPU_GEM_DOMAIN_VRAM, |
---|
| 1859 | + &adev->mman.discovery_memory, |
---|
| 1860 | + NULL); |
---|
| 1861 | + if (ret) { |
---|
| 1862 | + DRM_ERROR("alloc tmr failed(%d)!\n", ret); |
---|
| 1863 | + amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); |
---|
| 1864 | + return ret; |
---|
| 1865 | + } |
---|
| 1866 | + |
---|
| 1867 | + return 0; |
---|
| 1868 | +} |
---|
| 1869 | + |
---|
1704 | 1870 | /** |
---|
1705 | 1871 | * amdgpu_ttm_init - Init the memory management (ttm) as well as various |
---|
1706 | 1872 | * gtt/vram related fields. |
---|
.. | .. |
---|
1716 | 1882 | int r; |
---|
1717 | 1883 | u64 vis_vram_limit; |
---|
1718 | 1884 | |
---|
1719 | | - /* initialize global references for vram/gtt */ |
---|
1720 | | - r = amdgpu_ttm_global_init(adev); |
---|
1721 | | - if (r) { |
---|
1722 | | - return r; |
---|
1723 | | - } |
---|
| 1885 | + mutex_init(&adev->mman.gtt_window_lock); |
---|
| 1886 | + |
---|
1724 | 1887 | /* No others user of address space so set it to 0 */ |
---|
1725 | 1888 | r = ttm_bo_device_init(&adev->mman.bdev, |
---|
1726 | | - adev->mman.bo_global_ref.ref.object, |
---|
1727 | 1889 | &amdgpu_bo_driver, |
---|
1728 | | - adev->ddev->anon_inode->i_mapping, |
---|
1729 | | - DRM_FILE_PAGE_OFFSET, |
---|
1730 | | - adev->need_dma32); |
---|
| 1890 | + adev_to_drm(adev)->anon_inode->i_mapping, |
---|
| 1891 | + adev_to_drm(adev)->vma_offset_manager, |
---|
| 1892 | + dma_addressing_limited(adev->dev)); |
---|
1731 | 1893 | if (r) { |
---|
1732 | 1894 | DRM_ERROR("failed initializing buffer object driver(%d).\n", r); |
---|
1733 | 1895 | return r; |
---|
.. | .. |
---|
1738 | 1900 | adev->mman.bdev.no_retry = true; |
---|
1739 | 1901 | |
---|
1740 | 1902 | /* Initialize VRAM pool with all of VRAM divided into pages */ |
---|
1741 | | - r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM, |
---|
1742 | | - adev->gmc.real_vram_size >> PAGE_SHIFT); |
---|
| 1903 | + r = amdgpu_vram_mgr_init(adev); |
---|
1743 | 1904 | if (r) { |
---|
1744 | 1905 | DRM_ERROR("Failed initializing VRAM heap.\n"); |
---|
1745 | 1906 | return r; |
---|
.. | .. |
---|
1767 | 1928 | return r; |
---|
1768 | 1929 | } |
---|
1769 | 1930 | |
---|
| 1931 | + /* |
---|
| 1932 | + * only NAVI10 and onwards ASIC support for IP discovery. |
---|
| 1933 | + * If IP discovery enabled, a block of memory should be |
---|
| 1934 | + * reserved for IP discovey. |
---|
| 1935 | + */ |
---|
| 1936 | + if (adev->mman.discovery_bin) { |
---|
| 1937 | + r = amdgpu_ttm_reserve_tmr(adev); |
---|
| 1938 | + if (r) |
---|
| 1939 | + return r; |
---|
| 1940 | + } |
---|
| 1941 | + |
---|
1770 | 1942 | /* allocate memory as required for VGA |
---|
1771 | 1943 | * This is used for VGA emulation and pre-OS scanout buffers to |
---|
1772 | 1944 | * avoid display artifacts while transitioning between pre-OS |
---|
1773 | 1945 | * and driver. */ |
---|
1774 | | - if (adev->gmc.stolen_size) { |
---|
1775 | | - r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, |
---|
1776 | | - AMDGPU_GEM_DOMAIN_VRAM, |
---|
1777 | | - &adev->stolen_vga_memory, |
---|
1778 | | - NULL, NULL); |
---|
1779 | | - if (r) |
---|
1780 | | - return r; |
---|
1781 | | - } |
---|
| 1946 | + r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size, |
---|
| 1947 | + AMDGPU_GEM_DOMAIN_VRAM, |
---|
| 1948 | + &adev->mman.stolen_vga_memory, |
---|
| 1949 | + NULL); |
---|
| 1950 | + if (r) |
---|
| 1951 | + return r; |
---|
| 1952 | + r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size, |
---|
| 1953 | + adev->mman.stolen_extended_size, |
---|
| 1954 | + AMDGPU_GEM_DOMAIN_VRAM, |
---|
| 1955 | + &adev->mman.stolen_extended_memory, |
---|
| 1956 | + NULL); |
---|
| 1957 | + if (r) |
---|
| 1958 | + return r; |
---|
| 1959 | + |
---|
1782 | 1960 | DRM_INFO("amdgpu: %uM of VRAM memory ready\n", |
---|
1783 | 1961 | (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); |
---|
1784 | 1962 | |
---|
.. | .. |
---|
1796 | 1974 | gtt_size = (uint64_t)amdgpu_gtt_size << 20; |
---|
1797 | 1975 | |
---|
1798 | 1976 | /* Initialize GTT memory pool */ |
---|
1799 | | - r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT); |
---|
| 1977 | + r = amdgpu_gtt_mgr_init(adev, gtt_size); |
---|
1800 | 1978 | if (r) { |
---|
1801 | 1979 | DRM_ERROR("Failed initializing GTT heap.\n"); |
---|
1802 | 1980 | return r; |
---|
.. | .. |
---|
1805 | 1983 | (unsigned)(gtt_size / (1024 * 1024))); |
---|
1806 | 1984 | |
---|
1807 | 1985 | /* Initialize various on-chip memory pools */ |
---|
1808 | | - adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT; |
---|
1809 | | - adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT; |
---|
1810 | | - adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT; |
---|
1811 | | - adev->gds.gws.total_size = adev->gds.gws.total_size << AMDGPU_GWS_SHIFT; |
---|
1812 | | - adev->gds.gws.gfx_partition_size = adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT; |
---|
1813 | | - adev->gds.gws.cs_partition_size = adev->gds.gws.cs_partition_size << AMDGPU_GWS_SHIFT; |
---|
1814 | | - adev->gds.oa.total_size = adev->gds.oa.total_size << AMDGPU_OA_SHIFT; |
---|
1815 | | - adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size << AMDGPU_OA_SHIFT; |
---|
1816 | | - adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size << AMDGPU_OA_SHIFT; |
---|
1817 | | - /* GDS Memory */ |
---|
1818 | | - if (adev->gds.mem.total_size) { |
---|
1819 | | - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS, |
---|
1820 | | - adev->gds.mem.total_size >> PAGE_SHIFT); |
---|
1821 | | - if (r) { |
---|
1822 | | - DRM_ERROR("Failed initializing GDS heap.\n"); |
---|
1823 | | - return r; |
---|
1824 | | - } |
---|
1825 | | - } |
---|
1826 | | - |
---|
1827 | | - /* GWS */ |
---|
1828 | | - if (adev->gds.gws.total_size) { |
---|
1829 | | - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS, |
---|
1830 | | - adev->gds.gws.total_size >> PAGE_SHIFT); |
---|
1831 | | - if (r) { |
---|
1832 | | - DRM_ERROR("Failed initializing gws heap.\n"); |
---|
1833 | | - return r; |
---|
1834 | | - } |
---|
1835 | | - } |
---|
1836 | | - |
---|
1837 | | - /* OA */ |
---|
1838 | | - if (adev->gds.oa.total_size) { |
---|
1839 | | - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA, |
---|
1840 | | - adev->gds.oa.total_size >> PAGE_SHIFT); |
---|
1841 | | - if (r) { |
---|
1842 | | - DRM_ERROR("Failed initializing oa heap.\n"); |
---|
1843 | | - return r; |
---|
1844 | | - } |
---|
1845 | | - } |
---|
1846 | | - |
---|
1847 | | - /* Register debugfs entries for amdgpu_ttm */ |
---|
1848 | | - r = amdgpu_ttm_debugfs_init(adev); |
---|
| 1986 | + r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size); |
---|
1849 | 1987 | if (r) { |
---|
1850 | | - DRM_ERROR("Failed to init debugfs\n"); |
---|
| 1988 | + DRM_ERROR("Failed initializing GDS heap.\n"); |
---|
1851 | 1989 | return r; |
---|
1852 | 1990 | } |
---|
| 1991 | + |
---|
| 1992 | + r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, adev->gds.gws_size); |
---|
| 1993 | + if (r) { |
---|
| 1994 | + DRM_ERROR("Failed initializing gws heap.\n"); |
---|
| 1995 | + return r; |
---|
| 1996 | + } |
---|
| 1997 | + |
---|
| 1998 | + r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, adev->gds.oa_size); |
---|
| 1999 | + if (r) { |
---|
| 2000 | + DRM_ERROR("Failed initializing oa heap.\n"); |
---|
| 2001 | + return r; |
---|
| 2002 | + } |
---|
| 2003 | + |
---|
1853 | 2004 | return 0; |
---|
1854 | 2005 | } |
---|
1855 | 2006 | |
---|
.. | .. |
---|
1859 | 2010 | void amdgpu_ttm_late_init(struct amdgpu_device *adev) |
---|
1860 | 2011 | { |
---|
1861 | 2012 | /* return the VGA stolen memory (if any) back to VRAM */ |
---|
1862 | | - amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); |
---|
| 2013 | + if (!adev->mman.keep_stolen_vga_memory) |
---|
| 2014 | + amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); |
---|
| 2015 | + amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); |
---|
1863 | 2016 | } |
---|
1864 | 2017 | |
---|
1865 | 2018 | /** |
---|
.. | .. |
---|
1870 | 2023 | if (!adev->mman.initialized) |
---|
1871 | 2024 | return; |
---|
1872 | 2025 | |
---|
1873 | | - amdgpu_ttm_debugfs_fini(adev); |
---|
| 2026 | + amdgpu_ttm_training_reserve_vram_fini(adev); |
---|
| 2027 | + /* return the stolen vga memory back to VRAM */ |
---|
| 2028 | + if (adev->mman.keep_stolen_vga_memory) |
---|
| 2029 | + amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); |
---|
| 2030 | + /* return the IP Discovery TMR memory back to VRAM */ |
---|
| 2031 | + amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); |
---|
1874 | 2032 | amdgpu_ttm_fw_reserve_vram_fini(adev); |
---|
| 2033 | + |
---|
1875 | 2034 | if (adev->mman.aper_base_kaddr) |
---|
1876 | 2035 | iounmap(adev->mman.aper_base_kaddr); |
---|
1877 | 2036 | adev->mman.aper_base_kaddr = NULL; |
---|
1878 | 2037 | |
---|
1879 | | - ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM); |
---|
1880 | | - ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); |
---|
1881 | | - if (adev->gds.mem.total_size) |
---|
1882 | | - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS); |
---|
1883 | | - if (adev->gds.gws.total_size) |
---|
1884 | | - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS); |
---|
1885 | | - if (adev->gds.oa.total_size) |
---|
1886 | | - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA); |
---|
| 2038 | + amdgpu_vram_mgr_fini(adev); |
---|
| 2039 | + amdgpu_gtt_mgr_fini(adev); |
---|
| 2040 | + ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS); |
---|
| 2041 | + ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS); |
---|
| 2042 | + ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA); |
---|
1887 | 2043 | ttm_bo_device_release(&adev->mman.bdev); |
---|
1888 | | - amdgpu_ttm_global_fini(adev); |
---|
1889 | 2044 | adev->mman.initialized = false; |
---|
1890 | 2045 | DRM_INFO("amdgpu: ttm finalized\n"); |
---|
1891 | 2046 | } |
---|
.. | .. |
---|
1901 | 2056 | */ |
---|
1902 | 2057 | void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) |
---|
1903 | 2058 | { |
---|
1904 | | - struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM]; |
---|
| 2059 | + struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); |
---|
1905 | 2060 | uint64_t size; |
---|
1906 | 2061 | int r; |
---|
1907 | 2062 | |
---|
1908 | | - if (!adev->mman.initialized || adev->in_gpu_reset || |
---|
| 2063 | + if (!adev->mman.initialized || amdgpu_in_reset(adev) || |
---|
1909 | 2064 | adev->mman.buffer_funcs_enabled == enable) |
---|
1910 | 2065 | return; |
---|
1911 | 2066 | |
---|
1912 | 2067 | if (enable) { |
---|
1913 | 2068 | struct amdgpu_ring *ring; |
---|
1914 | | - struct drm_sched_rq *rq; |
---|
| 2069 | + struct drm_gpu_scheduler *sched; |
---|
1915 | 2070 | |
---|
1916 | 2071 | ring = adev->mman.buffer_funcs_ring; |
---|
1917 | | - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; |
---|
1918 | | - r = drm_sched_entity_init(&adev->mman.entity, &rq, 1, NULL); |
---|
| 2072 | + sched = &ring->sched; |
---|
| 2073 | + r = drm_sched_entity_init(&adev->mman.entity, |
---|
| 2074 | + DRM_SCHED_PRIORITY_KERNEL, &sched, |
---|
| 2075 | + 1, NULL); |
---|
1919 | 2076 | if (r) { |
---|
1920 | 2077 | DRM_ERROR("Failed setting up TTM BO move entity (%d)\n", |
---|
1921 | 2078 | r); |
---|
.. | .. |
---|
1938 | 2095 | |
---|
1939 | 2096 | int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) |
---|
1940 | 2097 | { |
---|
1941 | | - struct drm_file *file_priv; |
---|
1942 | | - struct amdgpu_device *adev; |
---|
| 2098 | + struct drm_file *file_priv = filp->private_data; |
---|
| 2099 | + struct amdgpu_device *adev = drm_to_adev(file_priv->minor->dev); |
---|
1943 | 2100 | |
---|
1944 | | - if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET)) |
---|
1945 | | - return -EINVAL; |
---|
1946 | | - |
---|
1947 | | - file_priv = filp->private_data; |
---|
1948 | | - adev = file_priv->minor->dev->dev_private; |
---|
1949 | 2101 | if (adev == NULL) |
---|
1950 | 2102 | return -EINVAL; |
---|
1951 | 2103 | |
---|
1952 | 2104 | return ttm_bo_mmap(filp, vma, &adev->mman.bdev); |
---|
1953 | 2105 | } |
---|
1954 | 2106 | |
---|
1955 | | -static int amdgpu_map_buffer(struct ttm_buffer_object *bo, |
---|
1956 | | - struct ttm_mem_reg *mem, unsigned num_pages, |
---|
1957 | | - uint64_t offset, unsigned window, |
---|
1958 | | - struct amdgpu_ring *ring, |
---|
1959 | | - uint64_t *addr) |
---|
1960 | | -{ |
---|
1961 | | - struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; |
---|
1962 | | - struct amdgpu_device *adev = ring->adev; |
---|
1963 | | - struct ttm_tt *ttm = bo->ttm; |
---|
1964 | | - struct amdgpu_job *job; |
---|
1965 | | - unsigned num_dw, num_bytes; |
---|
1966 | | - dma_addr_t *dma_address; |
---|
1967 | | - struct dma_fence *fence; |
---|
1968 | | - uint64_t src_addr, dst_addr; |
---|
1969 | | - uint64_t flags; |
---|
1970 | | - int r; |
---|
1971 | | - |
---|
1972 | | - BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < |
---|
1973 | | - AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); |
---|
1974 | | - |
---|
1975 | | - *addr = adev->gmc.gart_start; |
---|
1976 | | - *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * |
---|
1977 | | - AMDGPU_GPU_PAGE_SIZE; |
---|
1978 | | - |
---|
1979 | | - num_dw = adev->mman.buffer_funcs->copy_num_dw; |
---|
1980 | | - while (num_dw & 0x7) |
---|
1981 | | - num_dw++; |
---|
1982 | | - |
---|
1983 | | - num_bytes = num_pages * 8; |
---|
1984 | | - |
---|
1985 | | - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job); |
---|
1986 | | - if (r) |
---|
1987 | | - return r; |
---|
1988 | | - |
---|
1989 | | - src_addr = num_dw * 4; |
---|
1990 | | - src_addr += job->ibs[0].gpu_addr; |
---|
1991 | | - |
---|
1992 | | - dst_addr = adev->gart.table_addr; |
---|
1993 | | - dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; |
---|
1994 | | - amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, |
---|
1995 | | - dst_addr, num_bytes); |
---|
1996 | | - |
---|
1997 | | - amdgpu_ring_pad_ib(ring, &job->ibs[0]); |
---|
1998 | | - WARN_ON(job->ibs[0].length_dw > num_dw); |
---|
1999 | | - |
---|
2000 | | - dma_address = >t->ttm.dma_address[offset >> PAGE_SHIFT]; |
---|
2001 | | - flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem); |
---|
2002 | | - r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, |
---|
2003 | | - &job->ibs[0].ptr[num_dw]); |
---|
2004 | | - if (r) |
---|
2005 | | - goto error_free; |
---|
2006 | | - |
---|
2007 | | - r = amdgpu_job_submit(job, &adev->mman.entity, |
---|
2008 | | - AMDGPU_FENCE_OWNER_UNDEFINED, &fence); |
---|
2009 | | - if (r) |
---|
2010 | | - goto error_free; |
---|
2011 | | - |
---|
2012 | | - dma_fence_put(fence); |
---|
2013 | | - |
---|
2014 | | - return r; |
---|
2015 | | - |
---|
2016 | | -error_free: |
---|
2017 | | - amdgpu_job_free(job); |
---|
2018 | | - return r; |
---|
2019 | | -} |
---|
2020 | | - |
---|
2021 | 2107 | int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, |
---|
2022 | 2108 | uint64_t dst_offset, uint32_t byte_count, |
---|
2023 | | - struct reservation_object *resv, |
---|
| 2109 | + struct dma_resv *resv, |
---|
2024 | 2110 | struct dma_fence **fence, bool direct_submit, |
---|
2025 | | - bool vm_needs_flush) |
---|
| 2111 | + bool vm_needs_flush, bool tmz) |
---|
2026 | 2112 | { |
---|
| 2113 | + enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT : |
---|
| 2114 | + AMDGPU_IB_POOL_DELAYED; |
---|
2027 | 2115 | struct amdgpu_device *adev = ring->adev; |
---|
2028 | 2116 | struct amdgpu_job *job; |
---|
2029 | 2117 | |
---|
.. | .. |
---|
2032 | 2120 | unsigned i; |
---|
2033 | 2121 | int r; |
---|
2034 | 2122 | |
---|
2035 | | - if (direct_submit && !ring->ready) { |
---|
| 2123 | + if (!direct_submit && !ring->sched.ready) { |
---|
2036 | 2124 | DRM_ERROR("Trying to move memory with ring turned off.\n"); |
---|
2037 | 2125 | return -EINVAL; |
---|
2038 | 2126 | } |
---|
2039 | 2127 | |
---|
2040 | 2128 | max_bytes = adev->mman.buffer_funcs->copy_max_bytes; |
---|
2041 | 2129 | num_loops = DIV_ROUND_UP(byte_count, max_bytes); |
---|
2042 | | - num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw; |
---|
| 2130 | + num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); |
---|
2043 | 2131 | |
---|
2044 | | - /* for IB padding */ |
---|
2045 | | - while (num_dw & 0x7) |
---|
2046 | | - num_dw++; |
---|
2047 | | - |
---|
2048 | | - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job); |
---|
| 2132 | + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, &job); |
---|
2049 | 2133 | if (r) |
---|
2050 | 2134 | return r; |
---|
2051 | 2135 | |
---|
2052 | | - job->vm_needs_flush = vm_needs_flush; |
---|
| 2136 | + if (vm_needs_flush) { |
---|
| 2137 | + job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); |
---|
| 2138 | + job->vm_needs_flush = true; |
---|
| 2139 | + } |
---|
2053 | 2140 | if (resv) { |
---|
2054 | 2141 | r = amdgpu_sync_resv(adev, &job->sync, resv, |
---|
2055 | | - AMDGPU_FENCE_OWNER_UNDEFINED, |
---|
2056 | | - false); |
---|
| 2142 | + AMDGPU_SYNC_ALWAYS, |
---|
| 2143 | + AMDGPU_FENCE_OWNER_UNDEFINED); |
---|
2057 | 2144 | if (r) { |
---|
2058 | 2145 | DRM_ERROR("sync failed (%d).\n", r); |
---|
2059 | 2146 | goto error_free; |
---|
.. | .. |
---|
2064 | 2151 | uint32_t cur_size_in_bytes = min(byte_count, max_bytes); |
---|
2065 | 2152 | |
---|
2066 | 2153 | amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset, |
---|
2067 | | - dst_offset, cur_size_in_bytes); |
---|
| 2154 | + dst_offset, cur_size_in_bytes, tmz); |
---|
2068 | 2155 | |
---|
2069 | 2156 | src_offset += cur_size_in_bytes; |
---|
2070 | 2157 | dst_offset += cur_size_in_bytes; |
---|
.. | .. |
---|
2091 | 2178 | |
---|
2092 | 2179 | int amdgpu_fill_buffer(struct amdgpu_bo *bo, |
---|
2093 | 2180 | uint32_t src_data, |
---|
2094 | | - struct reservation_object *resv, |
---|
| 2181 | + struct dma_resv *resv, |
---|
2095 | 2182 | struct dma_fence **fence) |
---|
2096 | 2183 | { |
---|
2097 | 2184 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
---|
.. | .. |
---|
2120 | 2207 | mm_node = bo->tbo.mem.mm_node; |
---|
2121 | 2208 | num_loops = 0; |
---|
2122 | 2209 | while (num_pages) { |
---|
2123 | | - uint32_t byte_count = mm_node->size << PAGE_SHIFT; |
---|
| 2210 | + uint64_t byte_count = mm_node->size << PAGE_SHIFT; |
---|
2124 | 2211 | |
---|
2125 | | - num_loops += DIV_ROUND_UP(byte_count, max_bytes); |
---|
| 2212 | + num_loops += DIV_ROUND_UP_ULL(byte_count, max_bytes); |
---|
2126 | 2213 | num_pages -= mm_node->size; |
---|
2127 | 2214 | ++mm_node; |
---|
2128 | 2215 | } |
---|
.. | .. |
---|
2131 | 2218 | /* for IB padding */ |
---|
2132 | 2219 | num_dw += 64; |
---|
2133 | 2220 | |
---|
2134 | | - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job); |
---|
| 2221 | + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED, |
---|
| 2222 | + &job); |
---|
2135 | 2223 | if (r) |
---|
2136 | 2224 | return r; |
---|
2137 | 2225 | |
---|
2138 | 2226 | if (resv) { |
---|
2139 | 2227 | r = amdgpu_sync_resv(adev, &job->sync, resv, |
---|
2140 | | - AMDGPU_FENCE_OWNER_UNDEFINED, false); |
---|
| 2228 | + AMDGPU_SYNC_ALWAYS, |
---|
| 2229 | + AMDGPU_FENCE_OWNER_UNDEFINED); |
---|
2141 | 2230 | if (r) { |
---|
2142 | 2231 | DRM_ERROR("sync failed (%d).\n", r); |
---|
2143 | 2232 | goto error_free; |
---|
.. | .. |
---|
2148 | 2237 | mm_node = bo->tbo.mem.mm_node; |
---|
2149 | 2238 | |
---|
2150 | 2239 | while (num_pages) { |
---|
2151 | | - uint32_t byte_count = mm_node->size << PAGE_SHIFT; |
---|
| 2240 | + uint64_t byte_count = mm_node->size << PAGE_SHIFT; |
---|
2152 | 2241 | uint64_t dst_addr; |
---|
2153 | 2242 | |
---|
2154 | 2243 | dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem); |
---|
2155 | 2244 | while (byte_count) { |
---|
2156 | | - uint32_t cur_size_in_bytes = min(byte_count, max_bytes); |
---|
| 2245 | + uint32_t cur_size_in_bytes = min_t(uint64_t, byte_count, |
---|
| 2246 | + max_bytes); |
---|
2157 | 2247 | |
---|
2158 | 2248 | amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, |
---|
2159 | 2249 | dst_addr, cur_size_in_bytes); |
---|
.. | .. |
---|
2185 | 2275 | static int amdgpu_mm_dump_table(struct seq_file *m, void *data) |
---|
2186 | 2276 | { |
---|
2187 | 2277 | struct drm_info_node *node = (struct drm_info_node *)m->private; |
---|
2188 | | - unsigned ttm_pl = *(int *)node->info_ent->data; |
---|
| 2278 | + unsigned ttm_pl = (uintptr_t)node->info_ent->data; |
---|
2189 | 2279 | struct drm_device *dev = node->minor->dev; |
---|
2190 | | - struct amdgpu_device *adev = dev->dev_private; |
---|
2191 | | - struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl]; |
---|
| 2280 | + struct amdgpu_device *adev = drm_to_adev(dev); |
---|
| 2281 | + struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, ttm_pl); |
---|
2192 | 2282 | struct drm_printer p = drm_seq_file_printer(m); |
---|
2193 | 2283 | |
---|
2194 | 2284 | man->func->debug(man, &p); |
---|
2195 | 2285 | return 0; |
---|
2196 | 2286 | } |
---|
2197 | 2287 | |
---|
2198 | | -static int ttm_pl_vram = TTM_PL_VRAM; |
---|
2199 | | -static int ttm_pl_tt = TTM_PL_TT; |
---|
2200 | | - |
---|
2201 | 2288 | static const struct drm_info_list amdgpu_ttm_debugfs_list[] = { |
---|
2202 | | - {"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, &ttm_pl_vram}, |
---|
2203 | | - {"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, &ttm_pl_tt}, |
---|
| 2289 | + {"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_VRAM}, |
---|
| 2290 | + {"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_TT}, |
---|
| 2291 | + {"amdgpu_gds_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GDS}, |
---|
| 2292 | + {"amdgpu_gws_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GWS}, |
---|
| 2293 | + {"amdgpu_oa_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_OA}, |
---|
2204 | 2294 | {"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL}, |
---|
2205 | 2295 | #ifdef CONFIG_SWIOTLB |
---|
2206 | 2296 | {"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL} |
---|
.. | .. |
---|
2217 | 2307 | { |
---|
2218 | 2308 | struct amdgpu_device *adev = file_inode(f)->i_private; |
---|
2219 | 2309 | ssize_t result = 0; |
---|
2220 | | - int r; |
---|
2221 | 2310 | |
---|
2222 | 2311 | if (size & 0x3 || *pos & 0x3) |
---|
2223 | 2312 | return -EINVAL; |
---|
.. | .. |
---|
2225 | 2314 | if (*pos >= adev->gmc.mc_vram_size) |
---|
2226 | 2315 | return -ENXIO; |
---|
2227 | 2316 | |
---|
| 2317 | + size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos)); |
---|
2228 | 2318 | while (size) { |
---|
2229 | | - unsigned long flags; |
---|
2230 | | - uint32_t value; |
---|
| 2319 | + size_t bytes = min(size, AMDGPU_TTM_VRAM_MAX_DW_READ * 4); |
---|
| 2320 | + uint32_t value[AMDGPU_TTM_VRAM_MAX_DW_READ]; |
---|
2231 | 2321 | |
---|
2232 | | - if (*pos >= adev->gmc.mc_vram_size) |
---|
2233 | | - return result; |
---|
| 2322 | + amdgpu_device_vram_access(adev, *pos, value, bytes, false); |
---|
| 2323 | + if (copy_to_user(buf, value, bytes)) |
---|
| 2324 | + return -EFAULT; |
---|
2234 | 2325 | |
---|
2235 | | - spin_lock_irqsave(&adev->mmio_idx_lock, flags); |
---|
2236 | | - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000); |
---|
2237 | | - WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31); |
---|
2238 | | - value = RREG32_NO_KIQ(mmMM_DATA); |
---|
2239 | | - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); |
---|
2240 | | - |
---|
2241 | | - r = put_user(value, (uint32_t *)buf); |
---|
2242 | | - if (r) |
---|
2243 | | - return r; |
---|
2244 | | - |
---|
2245 | | - result += 4; |
---|
2246 | | - buf += 4; |
---|
2247 | | - *pos += 4; |
---|
2248 | | - size -= 4; |
---|
| 2326 | + result += bytes; |
---|
| 2327 | + buf += bytes; |
---|
| 2328 | + *pos += bytes; |
---|
| 2329 | + size -= bytes; |
---|
2249 | 2330 | } |
---|
2250 | 2331 | |
---|
2251 | 2332 | return result; |
---|
.. | .. |
---|
2482 | 2563 | |
---|
2483 | 2564 | #endif |
---|
2484 | 2565 | |
---|
2485 | | -static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) |
---|
| 2566 | +int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) |
---|
2486 | 2567 | { |
---|
2487 | 2568 | #if defined(CONFIG_DEBUG_FS) |
---|
2488 | 2569 | unsigned count; |
---|
2489 | 2570 | |
---|
2490 | | - struct drm_minor *minor = adev->ddev->primary; |
---|
| 2571 | + struct drm_minor *minor = adev_to_drm(adev)->primary; |
---|
2491 | 2572 | struct dentry *ent, *root = minor->debugfs_root; |
---|
2492 | 2573 | |
---|
2493 | 2574 | for (count = 0; count < ARRAY_SIZE(ttm_debugfs_entries); count++) { |
---|
.. | .. |
---|
2515 | 2596 | return amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count); |
---|
2516 | 2597 | #else |
---|
2517 | 2598 | return 0; |
---|
2518 | | -#endif |
---|
2519 | | -} |
---|
2520 | | - |
---|
2521 | | -static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev) |
---|
2522 | | -{ |
---|
2523 | | -#if defined(CONFIG_DEBUG_FS) |
---|
2524 | | - unsigned i; |
---|
2525 | | - |
---|
2526 | | - for (i = 0; i < ARRAY_SIZE(ttm_debugfs_entries); i++) |
---|
2527 | | - debugfs_remove(adev->mman.debugfs_entries[i]); |
---|
2528 | 2599 | #endif |
---|
2529 | 2600 | } |
---|