.. | .. |
---|
19 | 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
---|
20 | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
---|
21 | 21 | */ |
---|
22 | | - |
---|
23 | | -#define pr_fmt(fmt) "kfd2kgd: " fmt |
---|
24 | | - |
---|
| 22 | +#include <linux/dma-buf.h> |
---|
25 | 23 | #include <linux/list.h> |
---|
26 | 24 | #include <linux/pagemap.h> |
---|
27 | 25 | #include <linux/sched/mm.h> |
---|
28 | | -#include <drm/drmP.h> |
---|
| 26 | +#include <linux/sched/task.h> |
---|
| 27 | + |
---|
29 | 28 | #include "amdgpu_object.h" |
---|
30 | 29 | #include "amdgpu_vm.h" |
---|
31 | 30 | #include "amdgpu_amdkfd.h" |
---|
32 | | - |
---|
33 | | -/* Special VM and GART address alignment needed for VI pre-Fiji due to |
---|
34 | | - * a HW bug. |
---|
35 | | - */ |
---|
36 | | -#define VI_BO_SIZE_ALIGN (0x8000) |
---|
| 31 | +#include "amdgpu_dma_buf.h" |
---|
| 32 | +#include <uapi/linux/kfd_ioctl.h> |
---|
37 | 33 | |
---|
38 | 34 | /* BO flag to indicate a KFD userptr BO */ |
---|
39 | 35 | #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) |
---|
.. | .. |
---|
46 | 42 | /* Impose limit on how much memory KFD can use */ |
---|
47 | 43 | static struct { |
---|
48 | 44 | uint64_t max_system_mem_limit; |
---|
49 | | - uint64_t max_userptr_mem_limit; |
---|
| 45 | + uint64_t max_ttm_mem_limit; |
---|
50 | 46 | int64_t system_mem_used; |
---|
51 | | - int64_t userptr_mem_used; |
---|
| 47 | + int64_t ttm_mem_used; |
---|
52 | 48 | spinlock_t mem_limit_lock; |
---|
53 | 49 | } kfd_mem_limit; |
---|
54 | | - |
---|
55 | | -/* Struct used for amdgpu_amdkfd_bo_validate */ |
---|
56 | | -struct amdgpu_vm_parser { |
---|
57 | | - uint32_t domain; |
---|
58 | | - bool wait; |
---|
59 | | -}; |
---|
60 | 50 | |
---|
61 | 51 | static const char * const domain_bit_to_string[] = { |
---|
62 | 52 | "CPU", |
---|
.. | .. |
---|
90 | 80 | } |
---|
91 | 81 | |
---|
92 | 82 | /* Set memory usage limits. Current, limits are |
---|
93 | | - * System (kernel) memory - 3/8th System RAM |
---|
94 | | - * Userptr memory - 3/4th System RAM |
---|
| 83 | + * System (TTM + userptr) memory - 15/16th System RAM |
---|
| 84 | + * TTM memory - 3/8th System RAM |
---|
95 | 85 | */ |
---|
96 | 86 | void amdgpu_amdkfd_gpuvm_init_mem_limits(void) |
---|
97 | 87 | { |
---|
.. | .. |
---|
103 | 93 | mem *= si.mem_unit; |
---|
104 | 94 | |
---|
105 | 95 | spin_lock_init(&kfd_mem_limit.mem_limit_lock); |
---|
106 | | - kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); |
---|
107 | | - kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2); |
---|
108 | | - pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n", |
---|
| 96 | + kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4); |
---|
| 97 | + kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3); |
---|
| 98 | + pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", |
---|
109 | 99 | (kfd_mem_limit.max_system_mem_limit >> 20), |
---|
110 | | - (kfd_mem_limit.max_userptr_mem_limit >> 20)); |
---|
| 100 | + (kfd_mem_limit.max_ttm_mem_limit >> 20)); |
---|
111 | 101 | } |
---|
112 | 102 | |
---|
113 | | -static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, |
---|
114 | | - uint64_t size, u32 domain) |
---|
| 103 | +/* Estimate page table size needed to represent a given memory size |
---|
| 104 | + * |
---|
| 105 | + * With 4KB pages, we need one 8 byte PTE for each 4KB of memory |
---|
| 106 | + * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB |
---|
| 107 | + * of memory (factor 256K, >> 18). ROCm user mode tries to optimize |
---|
| 108 | + * for 2MB pages for TLB efficiency. However, small allocations and |
---|
| 109 | + * fragmented system memory still need some 4KB pages. We choose a |
---|
| 110 | + * compromise that should work in most cases without reserving too |
---|
| 111 | + * much memory for page tables unnecessarily (factor 16K, >> 14). |
---|
| 112 | + */ |
---|
| 113 | +#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) |
---|
| 114 | + |
---|
| 115 | +static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, |
---|
| 116 | + uint64_t size, u32 domain, bool sg) |
---|
115 | 117 | { |
---|
116 | | - size_t acc_size; |
---|
| 118 | + uint64_t reserved_for_pt = |
---|
| 119 | + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); |
---|
| 120 | + size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; |
---|
117 | 121 | int ret = 0; |
---|
118 | 122 | |
---|
119 | 123 | acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, |
---|
120 | 124 | sizeof(struct amdgpu_bo)); |
---|
121 | 125 | |
---|
122 | | - spin_lock(&kfd_mem_limit.mem_limit_lock); |
---|
| 126 | + vram_needed = 0; |
---|
123 | 127 | if (domain == AMDGPU_GEM_DOMAIN_GTT) { |
---|
124 | | - if (kfd_mem_limit.system_mem_used + (acc_size + size) > |
---|
125 | | - kfd_mem_limit.max_system_mem_limit) { |
---|
126 | | - ret = -ENOMEM; |
---|
127 | | - goto err_no_mem; |
---|
128 | | - } |
---|
129 | | - kfd_mem_limit.system_mem_used += (acc_size + size); |
---|
130 | | - } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { |
---|
131 | | - if ((kfd_mem_limit.system_mem_used + acc_size > |
---|
132 | | - kfd_mem_limit.max_system_mem_limit) || |
---|
133 | | - (kfd_mem_limit.userptr_mem_used + (size + acc_size) > |
---|
134 | | - kfd_mem_limit.max_userptr_mem_limit)) { |
---|
135 | | - ret = -ENOMEM; |
---|
136 | | - goto err_no_mem; |
---|
137 | | - } |
---|
138 | | - kfd_mem_limit.system_mem_used += acc_size; |
---|
139 | | - kfd_mem_limit.userptr_mem_used += size; |
---|
| 128 | + /* TTM GTT memory */ |
---|
| 129 | + system_mem_needed = acc_size + size; |
---|
| 130 | + ttm_mem_needed = acc_size + size; |
---|
| 131 | + } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { |
---|
| 132 | + /* Userptr */ |
---|
| 133 | + system_mem_needed = acc_size + size; |
---|
| 134 | + ttm_mem_needed = acc_size; |
---|
| 135 | + } else { |
---|
| 136 | + /* VRAM and SG */ |
---|
| 137 | + system_mem_needed = acc_size; |
---|
| 138 | + ttm_mem_needed = acc_size; |
---|
| 139 | + if (domain == AMDGPU_GEM_DOMAIN_VRAM) |
---|
| 140 | + vram_needed = size; |
---|
140 | 141 | } |
---|
141 | | -err_no_mem: |
---|
| 142 | + |
---|
| 143 | + spin_lock(&kfd_mem_limit.mem_limit_lock); |
---|
| 144 | + |
---|
| 145 | + if (kfd_mem_limit.system_mem_used + system_mem_needed > |
---|
| 146 | + kfd_mem_limit.max_system_mem_limit) |
---|
| 147 | + pr_debug("Set no_system_mem_limit=1 if using shared memory\n"); |
---|
| 148 | + |
---|
| 149 | + if ((kfd_mem_limit.system_mem_used + system_mem_needed > |
---|
| 150 | + kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) || |
---|
| 151 | + (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > |
---|
| 152 | + kfd_mem_limit.max_ttm_mem_limit) || |
---|
| 153 | + (adev->kfd.vram_used + vram_needed > |
---|
| 154 | + adev->gmc.real_vram_size - reserved_for_pt)) { |
---|
| 155 | + ret = -ENOMEM; |
---|
| 156 | + } else { |
---|
| 157 | + kfd_mem_limit.system_mem_used += system_mem_needed; |
---|
| 158 | + kfd_mem_limit.ttm_mem_used += ttm_mem_needed; |
---|
| 159 | + adev->kfd.vram_used += vram_needed; |
---|
| 160 | + } |
---|
| 161 | + |
---|
142 | 162 | spin_unlock(&kfd_mem_limit.mem_limit_lock); |
---|
143 | 163 | return ret; |
---|
144 | 164 | } |
---|
145 | 165 | |
---|
146 | | -static void unreserve_system_mem_limit(struct amdgpu_device *adev, |
---|
147 | | - uint64_t size, u32 domain) |
---|
| 166 | +static void unreserve_mem_limit(struct amdgpu_device *adev, |
---|
| 167 | + uint64_t size, u32 domain, bool sg) |
---|
148 | 168 | { |
---|
149 | 169 | size_t acc_size; |
---|
150 | 170 | |
---|
.. | .. |
---|
154 | 174 | spin_lock(&kfd_mem_limit.mem_limit_lock); |
---|
155 | 175 | if (domain == AMDGPU_GEM_DOMAIN_GTT) { |
---|
156 | 176 | kfd_mem_limit.system_mem_used -= (acc_size + size); |
---|
157 | | - } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { |
---|
| 177 | + kfd_mem_limit.ttm_mem_used -= (acc_size + size); |
---|
| 178 | + } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { |
---|
| 179 | + kfd_mem_limit.system_mem_used -= (acc_size + size); |
---|
| 180 | + kfd_mem_limit.ttm_mem_used -= acc_size; |
---|
| 181 | + } else { |
---|
158 | 182 | kfd_mem_limit.system_mem_used -= acc_size; |
---|
159 | | - kfd_mem_limit.userptr_mem_used -= size; |
---|
| 183 | + kfd_mem_limit.ttm_mem_used -= acc_size; |
---|
| 184 | + if (domain == AMDGPU_GEM_DOMAIN_VRAM) { |
---|
| 185 | + adev->kfd.vram_used -= size; |
---|
| 186 | + WARN_ONCE(adev->kfd.vram_used < 0, |
---|
| 187 | + "kfd VRAM memory accounting unbalanced"); |
---|
| 188 | + } |
---|
160 | 189 | } |
---|
161 | 190 | WARN_ONCE(kfd_mem_limit.system_mem_used < 0, |
---|
162 | 191 | "kfd system memory accounting unbalanced"); |
---|
163 | | - WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, |
---|
164 | | - "kfd userptr memory accounting unbalanced"); |
---|
| 192 | + WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, |
---|
| 193 | + "kfd TTM memory accounting unbalanced"); |
---|
165 | 194 | |
---|
166 | 195 | spin_unlock(&kfd_mem_limit.mem_limit_lock); |
---|
167 | 196 | } |
---|
168 | 197 | |
---|
169 | | -void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) |
---|
| 198 | +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) |
---|
170 | 199 | { |
---|
171 | | - spin_lock(&kfd_mem_limit.mem_limit_lock); |
---|
| 200 | + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
---|
| 201 | + u32 domain = bo->preferred_domains; |
---|
| 202 | + bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU); |
---|
172 | 203 | |
---|
173 | 204 | if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { |
---|
174 | | - kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; |
---|
175 | | - kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo); |
---|
176 | | - } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { |
---|
177 | | - kfd_mem_limit.system_mem_used -= |
---|
178 | | - (bo->tbo.acc_size + amdgpu_bo_size(bo)); |
---|
| 205 | + domain = AMDGPU_GEM_DOMAIN_CPU; |
---|
| 206 | + sg = false; |
---|
179 | 207 | } |
---|
180 | | - WARN_ONCE(kfd_mem_limit.system_mem_used < 0, |
---|
181 | | - "kfd system memory accounting unbalanced"); |
---|
182 | | - WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, |
---|
183 | | - "kfd userptr memory accounting unbalanced"); |
---|
184 | 208 | |
---|
185 | | - spin_unlock(&kfd_mem_limit.mem_limit_lock); |
---|
| 209 | + unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg); |
---|
186 | 210 | } |
---|
187 | 211 | |
---|
188 | 212 | |
---|
189 | | -/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's |
---|
| 213 | +/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's |
---|
190 | 214 | * reservation object. |
---|
191 | 215 | * |
---|
192 | 216 | * @bo: [IN] Remove eviction fence(s) from this BO |
---|
193 | | - * @ef: [IN] If ef is specified, then this eviction fence is removed if it |
---|
| 217 | + * @ef: [IN] This eviction fence is removed if it |
---|
194 | 218 | * is present in the shared list. |
---|
195 | | - * @ef_list: [OUT] Returns list of eviction fences. These fences are removed |
---|
196 | | - * from BO's reservation object shared list. |
---|
197 | | - * @ef_count: [OUT] Number of fences in ef_list. |
---|
198 | 219 | * |
---|
199 | | - * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be |
---|
200 | | - * called to restore the eviction fences and to avoid memory leak. This is |
---|
201 | | - * useful for shared BOs. |
---|
202 | 220 | * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held. |
---|
203 | 221 | */ |
---|
204 | 222 | static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, |
---|
205 | | - struct amdgpu_amdkfd_fence *ef, |
---|
206 | | - struct amdgpu_amdkfd_fence ***ef_list, |
---|
207 | | - unsigned int *ef_count) |
---|
| 223 | + struct amdgpu_amdkfd_fence *ef) |
---|
208 | 224 | { |
---|
209 | | - struct reservation_object *resv = bo->tbo.resv; |
---|
210 | | - struct reservation_object_list *old, *new; |
---|
| 225 | + struct dma_resv *resv = bo->tbo.base.resv; |
---|
| 226 | + struct dma_resv_list *old, *new; |
---|
211 | 227 | unsigned int i, j, k; |
---|
212 | 228 | |
---|
213 | | - if (!ef && !ef_list) |
---|
| 229 | + if (!ef) |
---|
214 | 230 | return -EINVAL; |
---|
215 | 231 | |
---|
216 | | - if (ef_list) { |
---|
217 | | - *ef_list = NULL; |
---|
218 | | - *ef_count = 0; |
---|
219 | | - } |
---|
220 | | - |
---|
221 | | - old = reservation_object_get_list(resv); |
---|
| 232 | + old = dma_resv_get_list(resv); |
---|
222 | 233 | if (!old) |
---|
223 | 234 | return 0; |
---|
224 | 235 | |
---|
.. | .. |
---|
234 | 245 | struct dma_fence *f; |
---|
235 | 246 | |
---|
236 | 247 | f = rcu_dereference_protected(old->shared[i], |
---|
237 | | - reservation_object_held(resv)); |
---|
| 248 | + dma_resv_held(resv)); |
---|
238 | 249 | |
---|
239 | | - if ((ef && f->context == ef->base.context) || |
---|
240 | | - (!ef && to_amdgpu_amdkfd_fence(f))) |
---|
| 250 | + if (f->context == ef->base.context) |
---|
241 | 251 | RCU_INIT_POINTER(new->shared[--j], f); |
---|
242 | 252 | else |
---|
243 | 253 | RCU_INIT_POINTER(new->shared[k++], f); |
---|
.. | .. |
---|
245 | 255 | new->shared_max = old->shared_max; |
---|
246 | 256 | new->shared_count = k; |
---|
247 | 257 | |
---|
248 | | - if (!ef) { |
---|
249 | | - unsigned int count = old->shared_count - j; |
---|
250 | | - |
---|
251 | | - /* Alloc memory for count number of eviction fence pointers. |
---|
252 | | - * Fill the ef_list array and ef_count |
---|
253 | | - */ |
---|
254 | | - *ef_list = kcalloc(count, sizeof(**ef_list), GFP_KERNEL); |
---|
255 | | - *ef_count = count; |
---|
256 | | - |
---|
257 | | - if (!*ef_list) { |
---|
258 | | - kfree(new); |
---|
259 | | - return -ENOMEM; |
---|
260 | | - } |
---|
261 | | - } |
---|
262 | | - |
---|
263 | 258 | /* Install the new fence list, seqcount provides the barriers */ |
---|
264 | | - preempt_disable(); |
---|
265 | 259 | write_seqcount_begin(&resv->seq); |
---|
266 | 260 | RCU_INIT_POINTER(resv->fence, new); |
---|
267 | 261 | write_seqcount_end(&resv->seq); |
---|
268 | | - preempt_enable(); |
---|
269 | 262 | |
---|
270 | 263 | /* Drop the references to the removed fences or move them to ef_list */ |
---|
271 | 264 | for (i = j, k = 0; i < old->shared_count; ++i) { |
---|
272 | 265 | struct dma_fence *f; |
---|
273 | 266 | |
---|
274 | 267 | f = rcu_dereference_protected(new->shared[i], |
---|
275 | | - reservation_object_held(resv)); |
---|
276 | | - if (!ef) |
---|
277 | | - (*ef_list)[k++] = to_amdgpu_amdkfd_fence(f); |
---|
278 | | - else |
---|
279 | | - dma_fence_put(f); |
---|
| 268 | + dma_resv_held(resv)); |
---|
| 269 | + dma_fence_put(f); |
---|
280 | 270 | } |
---|
281 | 271 | kfree_rcu(old, rcu); |
---|
282 | 272 | |
---|
283 | 273 | return 0; |
---|
284 | 274 | } |
---|
285 | 275 | |
---|
286 | | -/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's |
---|
287 | | - * reservation object. |
---|
288 | | - * |
---|
289 | | - * @bo: [IN] Add eviction fences to this BO |
---|
290 | | - * @ef_list: [IN] List of eviction fences to be added |
---|
291 | | - * @ef_count: [IN] Number of fences in ef_list. |
---|
292 | | - * |
---|
293 | | - * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this |
---|
294 | | - * function. |
---|
295 | | - */ |
---|
296 | | -static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo, |
---|
297 | | - struct amdgpu_amdkfd_fence **ef_list, |
---|
298 | | - unsigned int ef_count) |
---|
| 276 | +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) |
---|
299 | 277 | { |
---|
300 | | - int i; |
---|
| 278 | + struct amdgpu_bo *root = bo; |
---|
| 279 | + struct amdgpu_vm_bo_base *vm_bo; |
---|
| 280 | + struct amdgpu_vm *vm; |
---|
| 281 | + struct amdkfd_process_info *info; |
---|
| 282 | + struct amdgpu_amdkfd_fence *ef; |
---|
| 283 | + int ret; |
---|
301 | 284 | |
---|
302 | | - if (!ef_list || !ef_count) |
---|
303 | | - return; |
---|
| 285 | + /* we can always get vm_bo from root PD bo.*/ |
---|
| 286 | + while (root->parent) |
---|
| 287 | + root = root->parent; |
---|
304 | 288 | |
---|
305 | | - for (i = 0; i < ef_count; i++) { |
---|
306 | | - amdgpu_bo_fence(bo, &ef_list[i]->base, true); |
---|
307 | | - /* Re-adding the fence takes an additional reference. Drop that |
---|
308 | | - * reference. |
---|
309 | | - */ |
---|
310 | | - dma_fence_put(&ef_list[i]->base); |
---|
311 | | - } |
---|
| 289 | + vm_bo = root->vm_bo; |
---|
| 290 | + if (!vm_bo) |
---|
| 291 | + return 0; |
---|
312 | 292 | |
---|
313 | | - kfree(ef_list); |
---|
| 293 | + vm = vm_bo->vm; |
---|
| 294 | + if (!vm) |
---|
| 295 | + return 0; |
---|
| 296 | + |
---|
| 297 | + info = vm->process_info; |
---|
| 298 | + if (!info || !info->eviction_fence) |
---|
| 299 | + return 0; |
---|
| 300 | + |
---|
| 301 | + ef = container_of(dma_fence_get(&info->eviction_fence->base), |
---|
| 302 | + struct amdgpu_amdkfd_fence, base); |
---|
| 303 | + |
---|
| 304 | + BUG_ON(!dma_resv_trylock(bo->tbo.base.resv)); |
---|
| 305 | + ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef); |
---|
| 306 | + dma_resv_unlock(bo->tbo.base.resv); |
---|
| 307 | + |
---|
| 308 | + dma_fence_put(&ef->base); |
---|
| 309 | + return ret; |
---|
314 | 310 | } |
---|
315 | 311 | |
---|
316 | 312 | static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, |
---|
.. | .. |
---|
328 | 324 | ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); |
---|
329 | 325 | if (ret) |
---|
330 | 326 | goto validate_fail; |
---|
331 | | - if (wait) { |
---|
332 | | - struct amdgpu_amdkfd_fence **ef_list; |
---|
333 | | - unsigned int ef_count; |
---|
334 | | - |
---|
335 | | - ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list, |
---|
336 | | - &ef_count); |
---|
337 | | - if (ret) |
---|
338 | | - goto validate_fail; |
---|
339 | | - |
---|
340 | | - ttm_bo_wait(&bo->tbo, false, false); |
---|
341 | | - amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count); |
---|
342 | | - } |
---|
| 327 | + if (wait) |
---|
| 328 | + amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false); |
---|
343 | 329 | |
---|
344 | 330 | validate_fail: |
---|
345 | 331 | return ret; |
---|
346 | 332 | } |
---|
347 | 333 | |
---|
348 | | -static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) |
---|
| 334 | +static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) |
---|
349 | 335 | { |
---|
350 | | - struct amdgpu_vm_parser *p = param; |
---|
351 | | - |
---|
352 | | - return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); |
---|
| 336 | + return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false); |
---|
353 | 337 | } |
---|
354 | 338 | |
---|
355 | 339 | /* vm_validate_pt_pd_bos - Validate page table and directory BOs |
---|
.. | .. |
---|
363 | 347 | { |
---|
364 | 348 | struct amdgpu_bo *pd = vm->root.base.bo; |
---|
365 | 349 | struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); |
---|
366 | | - struct amdgpu_vm_parser param; |
---|
367 | | - uint64_t addr, flags = AMDGPU_PTE_VALID; |
---|
368 | 350 | int ret; |
---|
369 | 351 | |
---|
370 | | - param.domain = AMDGPU_GEM_DOMAIN_VRAM; |
---|
371 | | - param.wait = false; |
---|
372 | | - |
---|
373 | | - ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate, |
---|
374 | | - ¶m); |
---|
| 352 | + ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate_vm_bo, NULL); |
---|
375 | 353 | if (ret) { |
---|
376 | | - pr_err("amdgpu: failed to validate PT BOs\n"); |
---|
| 354 | + pr_err("failed to validate PT BOs\n"); |
---|
377 | 355 | return ret; |
---|
378 | 356 | } |
---|
379 | 357 | |
---|
380 | | - ret = amdgpu_amdkfd_validate(¶m, pd); |
---|
| 358 | + ret = amdgpu_amdkfd_validate_vm_bo(NULL, pd); |
---|
381 | 359 | if (ret) { |
---|
382 | | - pr_err("amdgpu: failed to validate PD\n"); |
---|
| 360 | + pr_err("failed to validate PD\n"); |
---|
383 | 361 | return ret; |
---|
384 | 362 | } |
---|
385 | 363 | |
---|
386 | | - addr = amdgpu_bo_gpu_offset(vm->root.base.bo); |
---|
387 | | - amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags); |
---|
388 | | - vm->pd_phys_addr = addr; |
---|
| 364 | + vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo); |
---|
389 | 365 | |
---|
390 | 366 | if (vm->use_cpu_for_update) { |
---|
391 | 367 | ret = amdgpu_bo_kmap(pd, NULL); |
---|
392 | 368 | if (ret) { |
---|
393 | | - pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret); |
---|
| 369 | + pr_err("failed to kmap PD, ret=%d\n", ret); |
---|
394 | 370 | return ret; |
---|
395 | 371 | } |
---|
396 | 372 | } |
---|
397 | 373 | |
---|
398 | 374 | return 0; |
---|
399 | | -} |
---|
400 | | - |
---|
401 | | -static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, |
---|
402 | | - struct dma_fence *f) |
---|
403 | | -{ |
---|
404 | | - int ret = amdgpu_sync_fence(adev, sync, f, false); |
---|
405 | | - |
---|
406 | | - /* Sync objects can't handle multiple GPUs (contexts) updating |
---|
407 | | - * sync->last_vm_update. Fortunately we don't need it for |
---|
408 | | - * KFD's purposes, so we can just drop that fence. |
---|
409 | | - */ |
---|
410 | | - if (sync->last_vm_update) { |
---|
411 | | - dma_fence_put(sync->last_vm_update); |
---|
412 | | - sync->last_vm_update = NULL; |
---|
413 | | - } |
---|
414 | | - |
---|
415 | | - return ret; |
---|
416 | 375 | } |
---|
417 | 376 | |
---|
418 | 377 | static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) |
---|
.. | .. |
---|
421 | 380 | struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); |
---|
422 | 381 | int ret; |
---|
423 | 382 | |
---|
424 | | - ret = amdgpu_vm_update_directories(adev, vm); |
---|
| 383 | + ret = amdgpu_vm_update_pdes(adev, vm, false); |
---|
425 | 384 | if (ret) |
---|
426 | 385 | return ret; |
---|
427 | 386 | |
---|
428 | | - return sync_vm_fence(adev, sync, vm->last_update); |
---|
| 387 | + return amdgpu_sync_fence(sync, vm->last_update); |
---|
| 388 | +} |
---|
| 389 | + |
---|
| 390 | +static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) |
---|
| 391 | +{ |
---|
| 392 | + struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); |
---|
| 393 | + bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT; |
---|
| 394 | + uint32_t mapping_flags; |
---|
| 395 | + |
---|
| 396 | + mapping_flags = AMDGPU_VM_PAGE_READABLE; |
---|
| 397 | + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE) |
---|
| 398 | + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; |
---|
| 399 | + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE) |
---|
| 400 | + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; |
---|
| 401 | + |
---|
| 402 | + switch (adev->asic_type) { |
---|
| 403 | + case CHIP_ARCTURUS: |
---|
| 404 | + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { |
---|
| 405 | + if (bo_adev == adev) |
---|
| 406 | + mapping_flags |= coherent ? |
---|
| 407 | + AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; |
---|
| 408 | + else |
---|
| 409 | + mapping_flags |= AMDGPU_VM_MTYPE_UC; |
---|
| 410 | + } else { |
---|
| 411 | + mapping_flags |= coherent ? |
---|
| 412 | + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; |
---|
| 413 | + } |
---|
| 414 | + break; |
---|
| 415 | + default: |
---|
| 416 | + mapping_flags |= coherent ? |
---|
| 417 | + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; |
---|
| 418 | + } |
---|
| 419 | + |
---|
| 420 | + return amdgpu_gem_va_map_flags(adev, mapping_flags); |
---|
429 | 421 | } |
---|
430 | 422 | |
---|
431 | 423 | /* add_bo_to_vm - Add a BO to a VM |
---|
.. | .. |
---|
446 | 438 | { |
---|
447 | 439 | int ret; |
---|
448 | 440 | struct kfd_bo_va_list *bo_va_entry; |
---|
449 | | - struct amdgpu_bo *pd = vm->root.base.bo; |
---|
450 | 441 | struct amdgpu_bo *bo = mem->bo; |
---|
451 | 442 | uint64_t va = mem->va; |
---|
452 | 443 | struct list_head *list_bo_va = &mem->bo_va_list; |
---|
.. | .. |
---|
477 | 468 | } |
---|
478 | 469 | |
---|
479 | 470 | bo_va_entry->va = va; |
---|
480 | | - bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev, |
---|
481 | | - mem->mapping_flags); |
---|
| 471 | + bo_va_entry->pte_flags = get_pte_flags(adev, mem); |
---|
482 | 472 | bo_va_entry->kgd_dev = (void *)adev; |
---|
483 | 473 | list_add(&bo_va_entry->bo_list, list_bo_va); |
---|
484 | 474 | |
---|
485 | 475 | if (p_bo_va_entry) |
---|
486 | 476 | *p_bo_va_entry = bo_va_entry; |
---|
487 | 477 | |
---|
488 | | - /* Allocate new page tables if needed and validate |
---|
489 | | - * them. Clearing of new page tables and validate need to wait |
---|
490 | | - * on move fences. We don't want that to trigger the eviction |
---|
491 | | - * fence, so remove it temporarily. |
---|
492 | | - */ |
---|
493 | | - amdgpu_amdkfd_remove_eviction_fence(pd, |
---|
494 | | - vm->process_info->eviction_fence, |
---|
495 | | - NULL, NULL); |
---|
496 | | - |
---|
497 | | - ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo)); |
---|
498 | | - if (ret) { |
---|
499 | | - pr_err("Failed to allocate pts, err=%d\n", ret); |
---|
500 | | - goto err_alloc_pts; |
---|
501 | | - } |
---|
502 | | - |
---|
| 478 | + /* Allocate validate page tables if needed */ |
---|
503 | 479 | ret = vm_validate_pt_pd_bos(vm); |
---|
504 | 480 | if (ret) { |
---|
505 | 481 | pr_err("validate_pt_pd_bos() failed\n"); |
---|
506 | 482 | goto err_alloc_pts; |
---|
507 | 483 | } |
---|
508 | 484 | |
---|
509 | | - /* Add the eviction fence back */ |
---|
510 | | - amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); |
---|
511 | | - |
---|
512 | 485 | return 0; |
---|
513 | 486 | |
---|
514 | 487 | err_alloc_pts: |
---|
515 | | - amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); |
---|
516 | 488 | amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); |
---|
517 | 489 | list_del(&bo_va_entry->bo_list); |
---|
518 | 490 | err_vmadd: |
---|
.. | .. |
---|
539 | 511 | struct amdgpu_bo *bo = mem->bo; |
---|
540 | 512 | |
---|
541 | 513 | INIT_LIST_HEAD(&entry->head); |
---|
542 | | - entry->shared = true; |
---|
| 514 | + entry->num_shared = 1; |
---|
543 | 515 | entry->bo = &bo->tbo; |
---|
544 | 516 | mutex_lock(&process_info->lock); |
---|
545 | 517 | if (userptr) |
---|
546 | 518 | list_add_tail(&entry->head, &process_info->userptr_valid_list); |
---|
547 | 519 | else |
---|
548 | 520 | list_add_tail(&entry->head, &process_info->kfd_bo_list); |
---|
| 521 | + mutex_unlock(&process_info->lock); |
---|
| 522 | +} |
---|
| 523 | + |
---|
| 524 | +static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, |
---|
| 525 | + struct amdkfd_process_info *process_info) |
---|
| 526 | +{ |
---|
| 527 | + struct ttm_validate_buffer *bo_list_entry; |
---|
| 528 | + |
---|
| 529 | + bo_list_entry = &mem->validate_list; |
---|
| 530 | + mutex_lock(&process_info->lock); |
---|
| 531 | + list_del(&bo_list_entry->head); |
---|
549 | 532 | mutex_unlock(&process_info->lock); |
---|
550 | 533 | } |
---|
551 | 534 | |
---|
.. | .. |
---|
561 | 544 | * |
---|
562 | 545 | * Returns 0 for success, negative errno for errors. |
---|
563 | 546 | */ |
---|
564 | | -static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, |
---|
565 | | - uint64_t user_addr) |
---|
| 547 | +static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr) |
---|
566 | 548 | { |
---|
567 | 549 | struct amdkfd_process_info *process_info = mem->process_info; |
---|
568 | 550 | struct amdgpu_bo *bo = mem->bo; |
---|
.. | .. |
---|
571 | 553 | |
---|
572 | 554 | mutex_lock(&process_info->lock); |
---|
573 | 555 | |
---|
574 | | - ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0); |
---|
| 556 | + ret = amdgpu_ttm_tt_set_userptr(&bo->tbo, user_addr, 0); |
---|
575 | 557 | if (ret) { |
---|
576 | 558 | pr_err("%s: Failed to set userptr: %d\n", __func__, ret); |
---|
577 | 559 | goto out; |
---|
.. | .. |
---|
584 | 566 | goto out; |
---|
585 | 567 | } |
---|
586 | 568 | |
---|
587 | | - /* If no restore worker is running concurrently, user_pages |
---|
588 | | - * should not be allocated |
---|
589 | | - */ |
---|
590 | | - WARN(mem->user_pages, "Leaking user_pages array"); |
---|
591 | | - |
---|
592 | | - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, |
---|
593 | | - sizeof(struct page *), |
---|
594 | | - GFP_KERNEL | __GFP_ZERO); |
---|
595 | | - if (!mem->user_pages) { |
---|
596 | | - pr_err("%s: Failed to allocate pages array\n", __func__); |
---|
597 | | - ret = -ENOMEM; |
---|
598 | | - goto unregister_out; |
---|
599 | | - } |
---|
600 | | - |
---|
601 | | - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); |
---|
| 569 | + ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); |
---|
602 | 570 | if (ret) { |
---|
603 | 571 | pr_err("%s: Failed to get user pages: %d\n", __func__, ret); |
---|
604 | | - goto free_out; |
---|
| 572 | + goto unregister_out; |
---|
605 | 573 | } |
---|
606 | | - |
---|
607 | | - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); |
---|
608 | 574 | |
---|
609 | 575 | ret = amdgpu_bo_reserve(bo, true); |
---|
610 | 576 | if (ret) { |
---|
.. | .. |
---|
618 | 584 | amdgpu_bo_unreserve(bo); |
---|
619 | 585 | |
---|
620 | 586 | release_out: |
---|
621 | | - if (ret) |
---|
622 | | - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); |
---|
623 | | -free_out: |
---|
624 | | - kvfree(mem->user_pages); |
---|
625 | | - mem->user_pages = NULL; |
---|
| 587 | + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); |
---|
626 | 588 | unregister_out: |
---|
627 | 589 | if (ret) |
---|
628 | 590 | amdgpu_mn_unregister(bo); |
---|
.. | .. |
---|
678 | 640 | if (!ctx->vm_pd) |
---|
679 | 641 | return -ENOMEM; |
---|
680 | 642 | |
---|
681 | | - ctx->kfd_bo.robj = bo; |
---|
682 | 643 | ctx->kfd_bo.priority = 0; |
---|
683 | 644 | ctx->kfd_bo.tv.bo = &bo->tbo; |
---|
684 | | - ctx->kfd_bo.tv.shared = true; |
---|
685 | | - ctx->kfd_bo.user_pages = NULL; |
---|
| 645 | + ctx->kfd_bo.tv.num_shared = 1; |
---|
686 | 646 | list_add(&ctx->kfd_bo.tv.head, &ctx->list); |
---|
687 | 647 | |
---|
688 | 648 | amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); |
---|
689 | 649 | |
---|
690 | 650 | ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, |
---|
691 | 651 | false, &ctx->duplicates); |
---|
692 | | - if (!ret) |
---|
693 | | - ctx->reserved = true; |
---|
694 | | - else { |
---|
695 | | - pr_err("Failed to reserve buffers in ttm\n"); |
---|
| 652 | + if (ret) { |
---|
| 653 | + pr_err("Failed to reserve buffers in ttm.\n"); |
---|
696 | 654 | kfree(ctx->vm_pd); |
---|
697 | 655 | ctx->vm_pd = NULL; |
---|
| 656 | + return ret; |
---|
698 | 657 | } |
---|
699 | 658 | |
---|
700 | | - return ret; |
---|
| 659 | + ctx->reserved = true; |
---|
| 660 | + return 0; |
---|
701 | 661 | } |
---|
702 | 662 | |
---|
703 | 663 | /** |
---|
.. | .. |
---|
743 | 703 | return -ENOMEM; |
---|
744 | 704 | } |
---|
745 | 705 | |
---|
746 | | - ctx->kfd_bo.robj = bo; |
---|
747 | 706 | ctx->kfd_bo.priority = 0; |
---|
748 | 707 | ctx->kfd_bo.tv.bo = &bo->tbo; |
---|
749 | | - ctx->kfd_bo.tv.shared = true; |
---|
750 | | - ctx->kfd_bo.user_pages = NULL; |
---|
| 708 | + ctx->kfd_bo.tv.num_shared = 1; |
---|
751 | 709 | list_add(&ctx->kfd_bo.tv.head, &ctx->list); |
---|
752 | 710 | |
---|
753 | 711 | i = 0; |
---|
.. | .. |
---|
764 | 722 | |
---|
765 | 723 | ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, |
---|
766 | 724 | false, &ctx->duplicates); |
---|
767 | | - if (!ret) |
---|
768 | | - ctx->reserved = true; |
---|
769 | | - else |
---|
770 | | - pr_err("Failed to reserve buffers in ttm.\n"); |
---|
771 | | - |
---|
772 | 725 | if (ret) { |
---|
| 726 | + pr_err("Failed to reserve buffers in ttm.\n"); |
---|
773 | 727 | kfree(ctx->vm_pd); |
---|
774 | 728 | ctx->vm_pd = NULL; |
---|
| 729 | + return ret; |
---|
775 | 730 | } |
---|
776 | 731 | |
---|
777 | | - return ret; |
---|
| 732 | + ctx->reserved = true; |
---|
| 733 | + return 0; |
---|
778 | 734 | } |
---|
779 | 735 | |
---|
780 | 736 | /** |
---|
.. | .. |
---|
813 | 769 | { |
---|
814 | 770 | struct amdgpu_bo_va *bo_va = entry->bo_va; |
---|
815 | 771 | struct amdgpu_vm *vm = bo_va->base.vm; |
---|
816 | | - struct amdgpu_bo *pd = vm->root.base.bo; |
---|
817 | 772 | |
---|
818 | | - /* Remove eviction fence from PD (and thereby from PTs too as |
---|
819 | | - * they share the resv. object). Otherwise during PT update |
---|
820 | | - * job (see amdgpu_vm_bo_update_mapping), eviction fence would |
---|
821 | | - * get added to job->sync object and job execution would |
---|
822 | | - * trigger the eviction fence. |
---|
823 | | - */ |
---|
824 | | - amdgpu_amdkfd_remove_eviction_fence(pd, |
---|
825 | | - vm->process_info->eviction_fence, |
---|
826 | | - NULL, NULL); |
---|
827 | 773 | amdgpu_vm_bo_unmap(adev, bo_va, entry->va); |
---|
828 | 774 | |
---|
829 | 775 | amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); |
---|
830 | 776 | |
---|
831 | | - /* Add the eviction fence back */ |
---|
832 | | - amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); |
---|
833 | | - |
---|
834 | | - sync_vm_fence(adev, sync, bo_va->last_pt_update); |
---|
| 777 | + amdgpu_sync_fence(sync, bo_va->last_pt_update); |
---|
835 | 778 | |
---|
836 | 779 | return 0; |
---|
837 | 780 | } |
---|
.. | .. |
---|
841 | 784 | struct amdgpu_sync *sync) |
---|
842 | 785 | { |
---|
843 | 786 | int ret; |
---|
844 | | - struct amdgpu_vm *vm; |
---|
845 | | - struct amdgpu_bo_va *bo_va; |
---|
846 | | - struct amdgpu_bo *bo; |
---|
847 | | - |
---|
848 | | - bo_va = entry->bo_va; |
---|
849 | | - vm = bo_va->base.vm; |
---|
850 | | - bo = bo_va->base.bo; |
---|
| 787 | + struct amdgpu_bo_va *bo_va = entry->bo_va; |
---|
851 | 788 | |
---|
852 | 789 | /* Update the page tables */ |
---|
853 | 790 | ret = amdgpu_vm_bo_update(adev, bo_va, false); |
---|
.. | .. |
---|
856 | 793 | return ret; |
---|
857 | 794 | } |
---|
858 | 795 | |
---|
859 | | - return sync_vm_fence(adev, sync, bo_va->last_pt_update); |
---|
| 796 | + return amdgpu_sync_fence(sync, bo_va->last_pt_update); |
---|
860 | 797 | } |
---|
861 | 798 | |
---|
862 | 799 | static int map_bo_to_gpuvm(struct amdgpu_device *adev, |
---|
.. | .. |
---|
891 | 828 | return ret; |
---|
892 | 829 | } |
---|
893 | 830 | |
---|
| 831 | +static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) |
---|
| 832 | +{ |
---|
| 833 | + struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); |
---|
| 834 | + |
---|
| 835 | + if (!sg) |
---|
| 836 | + return NULL; |
---|
| 837 | + if (sg_alloc_table(sg, 1, GFP_KERNEL)) { |
---|
| 838 | + kfree(sg); |
---|
| 839 | + return NULL; |
---|
| 840 | + } |
---|
| 841 | + sg->sgl->dma_address = addr; |
---|
| 842 | + sg->sgl->length = size; |
---|
| 843 | +#ifdef CONFIG_NEED_SG_DMA_LENGTH |
---|
| 844 | + sg->sgl->dma_length = size; |
---|
| 845 | +#endif |
---|
| 846 | + return sg; |
---|
| 847 | +} |
---|
| 848 | + |
---|
894 | 849 | static int process_validate_vms(struct amdkfd_process_info *process_info) |
---|
895 | 850 | { |
---|
896 | 851 | struct amdgpu_vm *peer_vm; |
---|
.. | .. |
---|
899 | 854 | list_for_each_entry(peer_vm, &process_info->vm_list_head, |
---|
900 | 855 | vm_list_node) { |
---|
901 | 856 | ret = vm_validate_pt_pd_bos(peer_vm); |
---|
| 857 | + if (ret) |
---|
| 858 | + return ret; |
---|
| 859 | + } |
---|
| 860 | + |
---|
| 861 | + return 0; |
---|
| 862 | +} |
---|
| 863 | + |
---|
| 864 | +static int process_sync_pds_resv(struct amdkfd_process_info *process_info, |
---|
| 865 | + struct amdgpu_sync *sync) |
---|
| 866 | +{ |
---|
| 867 | + struct amdgpu_vm *peer_vm; |
---|
| 868 | + int ret; |
---|
| 869 | + |
---|
| 870 | + list_for_each_entry(peer_vm, &process_info->vm_list_head, |
---|
| 871 | + vm_list_node) { |
---|
| 872 | + struct amdgpu_bo *pd = peer_vm->root.base.bo; |
---|
| 873 | + |
---|
| 874 | + ret = amdgpu_sync_resv(NULL, sync, pd->tbo.base.resv, |
---|
| 875 | + AMDGPU_SYNC_NE_OWNER, |
---|
| 876 | + AMDGPU_FENCE_OWNER_KFD); |
---|
902 | 877 | if (ret) |
---|
903 | 878 | return ret; |
---|
904 | 879 | } |
---|
.. | .. |
---|
968 | 943 | pr_err("validate_pt_pd_bos() failed\n"); |
---|
969 | 944 | goto validate_pd_fail; |
---|
970 | 945 | } |
---|
971 | | - ret = ttm_bo_wait(&vm->root.base.bo->tbo, false, false); |
---|
| 946 | + ret = amdgpu_bo_sync_wait(vm->root.base.bo, |
---|
| 947 | + AMDGPU_FENCE_OWNER_KFD, false); |
---|
972 | 948 | if (ret) |
---|
973 | 949 | goto wait_pd_fail; |
---|
| 950 | + ret = dma_resv_reserve_shared(vm->root.base.bo->tbo.base.resv, 1); |
---|
| 951 | + if (ret) |
---|
| 952 | + goto reserve_shared_fail; |
---|
974 | 953 | amdgpu_bo_fence(vm->root.base.bo, |
---|
975 | 954 | &vm->process_info->eviction_fence->base, true); |
---|
976 | 955 | amdgpu_bo_unreserve(vm->root.base.bo); |
---|
.. | .. |
---|
984 | 963 | |
---|
985 | 964 | return 0; |
---|
986 | 965 | |
---|
| 966 | +reserve_shared_fail: |
---|
987 | 967 | wait_pd_fail: |
---|
988 | 968 | validate_pd_fail: |
---|
989 | 969 | amdgpu_bo_unreserve(vm->root.base.bo); |
---|
.. | .. |
---|
1003 | 983 | return ret; |
---|
1004 | 984 | } |
---|
1005 | 985 | |
---|
1006 | | -int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, |
---|
1007 | | - void **process_info, |
---|
| 986 | +int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, u32 pasid, |
---|
| 987 | + void **vm, void **process_info, |
---|
1008 | 988 | struct dma_fence **ef) |
---|
1009 | 989 | { |
---|
1010 | 990 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
---|
.. | .. |
---|
1016 | 996 | return -ENOMEM; |
---|
1017 | 997 | |
---|
1018 | 998 | /* Initialize AMDGPU part of the VM */ |
---|
1019 | | - ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0); |
---|
| 999 | + ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, pasid); |
---|
1020 | 1000 | if (ret) { |
---|
1021 | 1001 | pr_err("Failed init vm ret %d\n", ret); |
---|
1022 | 1002 | goto amdgpu_vm_init_fail; |
---|
.. | .. |
---|
1039 | 1019 | } |
---|
1040 | 1020 | |
---|
1041 | 1021 | int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, |
---|
1042 | | - struct file *filp, |
---|
| 1022 | + struct file *filp, u32 pasid, |
---|
1043 | 1023 | void **vm, void **process_info, |
---|
1044 | 1024 | struct dma_fence **ef) |
---|
1045 | 1025 | { |
---|
1046 | 1026 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
---|
1047 | | - struct drm_file *drm_priv = filp->private_data; |
---|
1048 | | - struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv; |
---|
1049 | | - struct amdgpu_vm *avm = &drv_priv->vm; |
---|
| 1027 | + struct amdgpu_fpriv *drv_priv; |
---|
| 1028 | + struct amdgpu_vm *avm; |
---|
1050 | 1029 | int ret; |
---|
| 1030 | + |
---|
| 1031 | + ret = amdgpu_file_to_fpriv(filp, &drv_priv); |
---|
| 1032 | + if (ret) |
---|
| 1033 | + return ret; |
---|
| 1034 | + avm = &drv_priv->vm; |
---|
1051 | 1035 | |
---|
1052 | 1036 | /* Already a compute VM? */ |
---|
1053 | 1037 | if (avm->process_info) |
---|
1054 | 1038 | return -EINVAL; |
---|
1055 | 1039 | |
---|
1056 | 1040 | /* Convert VM into a compute VM */ |
---|
1057 | | - ret = amdgpu_vm_make_compute(adev, avm); |
---|
| 1041 | + ret = amdgpu_vm_make_compute(adev, avm, pasid); |
---|
1058 | 1042 | if (ret) |
---|
1059 | 1043 | return ret; |
---|
1060 | 1044 | |
---|
.. | .. |
---|
1088 | 1072 | list_del(&vm->vm_list_node); |
---|
1089 | 1073 | mutex_unlock(&process_info->lock); |
---|
1090 | 1074 | |
---|
| 1075 | + vm->process_info = NULL; |
---|
| 1076 | + |
---|
1091 | 1077 | /* Release per-process resources when last compute VM is destroyed */ |
---|
1092 | 1078 | if (!process_info->n_vms) { |
---|
1093 | 1079 | WARN_ON(!list_empty(&process_info->kfd_bo_list)); |
---|
.. | .. |
---|
1117 | 1103 | kfree(vm); |
---|
1118 | 1104 | } |
---|
1119 | 1105 | |
---|
1120 | | -uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) |
---|
| 1106 | +void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm) |
---|
| 1107 | +{ |
---|
| 1108 | + struct amdgpu_device *adev = get_amdgpu_device(kgd); |
---|
| 1109 | + struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; |
---|
| 1110 | + |
---|
| 1111 | + if (WARN_ON(!kgd || !vm)) |
---|
| 1112 | + return; |
---|
| 1113 | + |
---|
| 1114 | + pr_debug("Releasing process vm %p\n", vm); |
---|
| 1115 | + |
---|
| 1116 | + /* The original pasid of amdgpu vm has already been |
---|
| 1117 | + * released during making a amdgpu vm to a compute vm |
---|
| 1118 | + * The current pasid is managed by kfd and will be |
---|
| 1119 | + * released on kfd process destroy. Set amdgpu pasid |
---|
| 1120 | + * to 0 to avoid duplicate release. |
---|
| 1121 | + */ |
---|
| 1122 | + amdgpu_vm_release_compute(adev, avm); |
---|
| 1123 | +} |
---|
| 1124 | + |
---|
| 1125 | +uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) |
---|
1121 | 1126 | { |
---|
1122 | 1127 | struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; |
---|
| 1128 | + struct amdgpu_bo *pd = avm->root.base.bo; |
---|
| 1129 | + struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); |
---|
1123 | 1130 | |
---|
1124 | | - return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; |
---|
| 1131 | + if (adev->asic_type < CHIP_VEGA10) |
---|
| 1132 | + return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; |
---|
| 1133 | + return avm->pd_phys_addr; |
---|
1125 | 1134 | } |
---|
1126 | 1135 | |
---|
1127 | 1136 | int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( |
---|
.. | .. |
---|
1131 | 1140 | { |
---|
1132 | 1141 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
---|
1133 | 1142 | struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; |
---|
| 1143 | + enum ttm_bo_type bo_type = ttm_bo_type_device; |
---|
| 1144 | + struct sg_table *sg = NULL; |
---|
1134 | 1145 | uint64_t user_addr = 0; |
---|
1135 | 1146 | struct amdgpu_bo *bo; |
---|
1136 | 1147 | struct amdgpu_bo_param bp; |
---|
1137 | | - int byte_align; |
---|
1138 | 1148 | u32 domain, alloc_domain; |
---|
1139 | 1149 | u64 alloc_flags; |
---|
1140 | | - uint32_t mapping_flags; |
---|
1141 | 1150 | int ret; |
---|
1142 | 1151 | |
---|
1143 | 1152 | /* |
---|
1144 | 1153 | * Check on which domain to allocate BO |
---|
1145 | 1154 | */ |
---|
1146 | | - if (flags & ALLOC_MEM_FLAGS_VRAM) { |
---|
| 1155 | + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { |
---|
1147 | 1156 | domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; |
---|
1148 | | - alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; |
---|
1149 | | - alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? |
---|
| 1157 | + alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; |
---|
| 1158 | + alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? |
---|
1150 | 1159 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : |
---|
1151 | 1160 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS; |
---|
1152 | | - } else if (flags & ALLOC_MEM_FLAGS_GTT) { |
---|
| 1161 | + } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { |
---|
1153 | 1162 | domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; |
---|
1154 | 1163 | alloc_flags = 0; |
---|
1155 | | - } else if (flags & ALLOC_MEM_FLAGS_USERPTR) { |
---|
| 1164 | + } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { |
---|
1156 | 1165 | domain = AMDGPU_GEM_DOMAIN_GTT; |
---|
1157 | 1166 | alloc_domain = AMDGPU_GEM_DOMAIN_CPU; |
---|
1158 | 1167 | alloc_flags = 0; |
---|
1159 | 1168 | if (!offset || !*offset) |
---|
1160 | 1169 | return -EINVAL; |
---|
1161 | 1170 | user_addr = untagged_addr(*offset); |
---|
| 1171 | + } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | |
---|
| 1172 | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { |
---|
| 1173 | + domain = AMDGPU_GEM_DOMAIN_GTT; |
---|
| 1174 | + alloc_domain = AMDGPU_GEM_DOMAIN_CPU; |
---|
| 1175 | + bo_type = ttm_bo_type_sg; |
---|
| 1176 | + alloc_flags = 0; |
---|
| 1177 | + if (size > UINT_MAX) |
---|
| 1178 | + return -EINVAL; |
---|
| 1179 | + sg = create_doorbell_sg(*offset, size); |
---|
| 1180 | + if (!sg) |
---|
| 1181 | + return -ENOMEM; |
---|
1162 | 1182 | } else { |
---|
1163 | 1183 | return -EINVAL; |
---|
1164 | 1184 | } |
---|
1165 | 1185 | |
---|
1166 | 1186 | *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); |
---|
1167 | | - if (!*mem) |
---|
1168 | | - return -ENOMEM; |
---|
| 1187 | + if (!*mem) { |
---|
| 1188 | + ret = -ENOMEM; |
---|
| 1189 | + goto err; |
---|
| 1190 | + } |
---|
1169 | 1191 | INIT_LIST_HEAD(&(*mem)->bo_va_list); |
---|
1170 | 1192 | mutex_init(&(*mem)->lock); |
---|
1171 | | - (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); |
---|
| 1193 | + (*mem)->aql_queue = !!(flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); |
---|
1172 | 1194 | |
---|
1173 | 1195 | /* Workaround for AQL queue wraparound bug. Map the same |
---|
1174 | 1196 | * memory twice. That means we only actually allocate half |
---|
.. | .. |
---|
1177 | 1199 | if ((*mem)->aql_queue) |
---|
1178 | 1200 | size = size >> 1; |
---|
1179 | 1201 | |
---|
1180 | | - /* Workaround for TLB bug on older VI chips */ |
---|
1181 | | - byte_align = (adev->family == AMDGPU_FAMILY_VI && |
---|
1182 | | - adev->asic_type != CHIP_FIJI && |
---|
1183 | | - adev->asic_type != CHIP_POLARIS10 && |
---|
1184 | | - adev->asic_type != CHIP_POLARIS11) ? |
---|
1185 | | - VI_BO_SIZE_ALIGN : 1; |
---|
1186 | | - |
---|
1187 | | - mapping_flags = AMDGPU_VM_PAGE_READABLE; |
---|
1188 | | - if (flags & ALLOC_MEM_FLAGS_WRITABLE) |
---|
1189 | | - mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; |
---|
1190 | | - if (flags & ALLOC_MEM_FLAGS_EXECUTABLE) |
---|
1191 | | - mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; |
---|
1192 | | - if (flags & ALLOC_MEM_FLAGS_COHERENT) |
---|
1193 | | - mapping_flags |= AMDGPU_VM_MTYPE_UC; |
---|
1194 | | - else |
---|
1195 | | - mapping_flags |= AMDGPU_VM_MTYPE_NC; |
---|
1196 | | - (*mem)->mapping_flags = mapping_flags; |
---|
| 1202 | + (*mem)->alloc_flags = flags; |
---|
1197 | 1203 | |
---|
1198 | 1204 | amdgpu_sync_create(&(*mem)->sync); |
---|
1199 | 1205 | |
---|
1200 | | - ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain); |
---|
| 1206 | + ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg); |
---|
1201 | 1207 | if (ret) { |
---|
1202 | 1208 | pr_debug("Insufficient system memory\n"); |
---|
1203 | | - goto err_reserve_system_mem; |
---|
| 1209 | + goto err_reserve_limit; |
---|
1204 | 1210 | } |
---|
1205 | 1211 | |
---|
1206 | 1212 | pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", |
---|
.. | .. |
---|
1208 | 1214 | |
---|
1209 | 1215 | memset(&bp, 0, sizeof(bp)); |
---|
1210 | 1216 | bp.size = size; |
---|
1211 | | - bp.byte_align = byte_align; |
---|
| 1217 | + bp.byte_align = 1; |
---|
1212 | 1218 | bp.domain = alloc_domain; |
---|
1213 | 1219 | bp.flags = alloc_flags; |
---|
1214 | | - bp.type = ttm_bo_type_device; |
---|
| 1220 | + bp.type = bo_type; |
---|
1215 | 1221 | bp.resv = NULL; |
---|
1216 | 1222 | ret = amdgpu_bo_create(adev, &bp, &bo); |
---|
1217 | 1223 | if (ret) { |
---|
1218 | 1224 | pr_debug("Failed to create BO on domain %s. ret %d\n", |
---|
1219 | 1225 | domain_string(alloc_domain), ret); |
---|
1220 | 1226 | goto err_bo_create; |
---|
| 1227 | + } |
---|
| 1228 | + if (bo_type == ttm_bo_type_sg) { |
---|
| 1229 | + bo->tbo.sg = sg; |
---|
| 1230 | + bo->tbo.ttm->sg = sg; |
---|
1221 | 1231 | } |
---|
1222 | 1232 | bo->kfd_bo = *mem; |
---|
1223 | 1233 | (*mem)->bo = bo; |
---|
.. | .. |
---|
1231 | 1241 | add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); |
---|
1232 | 1242 | |
---|
1233 | 1243 | if (user_addr) { |
---|
1234 | | - ret = init_user_pages(*mem, current->mm, user_addr); |
---|
1235 | | - if (ret) { |
---|
1236 | | - mutex_lock(&avm->process_info->lock); |
---|
1237 | | - list_del(&(*mem)->validate_list.head); |
---|
1238 | | - mutex_unlock(&avm->process_info->lock); |
---|
| 1244 | + ret = init_user_pages(*mem, user_addr); |
---|
| 1245 | + if (ret) |
---|
1239 | 1246 | goto allocate_init_user_pages_failed; |
---|
1240 | | - } |
---|
1241 | 1247 | } |
---|
1242 | 1248 | |
---|
1243 | 1249 | if (offset) |
---|
.. | .. |
---|
1246 | 1252 | return 0; |
---|
1247 | 1253 | |
---|
1248 | 1254 | allocate_init_user_pages_failed: |
---|
| 1255 | + remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); |
---|
1249 | 1256 | amdgpu_bo_unref(&bo); |
---|
1250 | 1257 | /* Don't unreserve system mem limit twice */ |
---|
1251 | | - goto err_reserve_system_mem; |
---|
| 1258 | + goto err_reserve_limit; |
---|
1252 | 1259 | err_bo_create: |
---|
1253 | | - unreserve_system_mem_limit(adev, size, alloc_domain); |
---|
1254 | | -err_reserve_system_mem: |
---|
| 1260 | + unreserve_mem_limit(adev, size, alloc_domain, !!sg); |
---|
| 1261 | +err_reserve_limit: |
---|
1255 | 1262 | mutex_destroy(&(*mem)->lock); |
---|
1256 | 1263 | kfree(*mem); |
---|
| 1264 | +err: |
---|
| 1265 | + if (sg) { |
---|
| 1266 | + sg_free_table(sg); |
---|
| 1267 | + kfree(sg); |
---|
| 1268 | + } |
---|
1257 | 1269 | return ret; |
---|
1258 | 1270 | } |
---|
1259 | 1271 | |
---|
1260 | 1272 | int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( |
---|
1261 | | - struct kgd_dev *kgd, struct kgd_mem *mem) |
---|
| 1273 | + struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size) |
---|
1262 | 1274 | { |
---|
1263 | 1275 | struct amdkfd_process_info *process_info = mem->process_info; |
---|
1264 | 1276 | unsigned long bo_size = mem->bo->tbo.mem.size; |
---|
1265 | 1277 | struct kfd_bo_va_list *entry, *tmp; |
---|
1266 | 1278 | struct bo_vm_reservation_context ctx; |
---|
1267 | 1279 | struct ttm_validate_buffer *bo_list_entry; |
---|
| 1280 | + unsigned int mapped_to_gpu_memory; |
---|
1268 | 1281 | int ret; |
---|
| 1282 | + bool is_imported = 0; |
---|
1269 | 1283 | |
---|
1270 | 1284 | mutex_lock(&mem->lock); |
---|
1271 | | - |
---|
1272 | | - if (mem->mapped_to_gpu_memory > 0) { |
---|
1273 | | - pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n", |
---|
1274 | | - mem->va, bo_size); |
---|
1275 | | - mutex_unlock(&mem->lock); |
---|
1276 | | - return -EBUSY; |
---|
1277 | | - } |
---|
1278 | | - |
---|
| 1285 | + mapped_to_gpu_memory = mem->mapped_to_gpu_memory; |
---|
| 1286 | + is_imported = mem->is_imported; |
---|
1279 | 1287 | mutex_unlock(&mem->lock); |
---|
1280 | 1288 | /* lock is not needed after this, since mem is unused and will |
---|
1281 | 1289 | * be freed anyway |
---|
1282 | 1290 | */ |
---|
1283 | 1291 | |
---|
1284 | | - /* No more MMU notifiers */ |
---|
1285 | | - amdgpu_mn_unregister(mem->bo); |
---|
| 1292 | + if (mapped_to_gpu_memory > 0) { |
---|
| 1293 | + pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n", |
---|
| 1294 | + mem->va, bo_size); |
---|
| 1295 | + return -EBUSY; |
---|
| 1296 | + } |
---|
1286 | 1297 | |
---|
1287 | 1298 | /* Make sure restore workers don't access the BO any more */ |
---|
1288 | 1299 | bo_list_entry = &mem->validate_list; |
---|
.. | .. |
---|
1290 | 1301 | list_del(&bo_list_entry->head); |
---|
1291 | 1302 | mutex_unlock(&process_info->lock); |
---|
1292 | 1303 | |
---|
1293 | | - /* Free user pages if necessary */ |
---|
1294 | | - if (mem->user_pages) { |
---|
1295 | | - pr_debug("%s: Freeing user_pages array\n", __func__); |
---|
1296 | | - if (mem->user_pages[0]) |
---|
1297 | | - release_pages(mem->user_pages, |
---|
1298 | | - mem->bo->tbo.ttm->num_pages); |
---|
1299 | | - kvfree(mem->user_pages); |
---|
1300 | | - } |
---|
| 1304 | + /* No more MMU notifiers */ |
---|
| 1305 | + amdgpu_mn_unregister(mem->bo); |
---|
1301 | 1306 | |
---|
1302 | 1307 | ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); |
---|
1303 | 1308 | if (unlikely(ret)) |
---|
.. | .. |
---|
1308 | 1313 | * attached |
---|
1309 | 1314 | */ |
---|
1310 | 1315 | amdgpu_amdkfd_remove_eviction_fence(mem->bo, |
---|
1311 | | - process_info->eviction_fence, |
---|
1312 | | - NULL, NULL); |
---|
| 1316 | + process_info->eviction_fence); |
---|
1313 | 1317 | pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, |
---|
1314 | 1318 | mem->va + bo_size * (1 + mem->aql_queue)); |
---|
1315 | 1319 | |
---|
.. | .. |
---|
1323 | 1327 | /* Free the sync object */ |
---|
1324 | 1328 | amdgpu_sync_free(&mem->sync); |
---|
1325 | 1329 | |
---|
| 1330 | + /* If the SG is not NULL, it's one we created for a doorbell or mmio |
---|
| 1331 | + * remap BO. We need to free it. |
---|
| 1332 | + */ |
---|
| 1333 | + if (mem->bo->tbo.sg) { |
---|
| 1334 | + sg_free_table(mem->bo->tbo.sg); |
---|
| 1335 | + kfree(mem->bo->tbo.sg); |
---|
| 1336 | + } |
---|
| 1337 | + |
---|
| 1338 | + /* Update the size of the BO being freed if it was allocated from |
---|
| 1339 | + * VRAM and is not imported. |
---|
| 1340 | + */ |
---|
| 1341 | + if (size) { |
---|
| 1342 | + if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) && |
---|
| 1343 | + (!is_imported)) |
---|
| 1344 | + *size = bo_size; |
---|
| 1345 | + else |
---|
| 1346 | + *size = 0; |
---|
| 1347 | + } |
---|
| 1348 | + |
---|
1326 | 1349 | /* Free the BO*/ |
---|
1327 | | - amdgpu_bo_unref(&mem->bo); |
---|
| 1350 | + drm_gem_object_put(&mem->bo->tbo.base); |
---|
1328 | 1351 | mutex_destroy(&mem->lock); |
---|
1329 | 1352 | kfree(mem); |
---|
1330 | 1353 | |
---|
.. | .. |
---|
1363 | 1386 | * concurrently and the queues are actually stopped |
---|
1364 | 1387 | */ |
---|
1365 | 1388 | if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { |
---|
1366 | | - down_write(¤t->mm->mmap_sem); |
---|
| 1389 | + mmap_write_lock(current->mm); |
---|
1367 | 1390 | is_invalid_userptr = atomic_read(&mem->invalid); |
---|
1368 | | - up_write(¤t->mm->mmap_sem); |
---|
| 1391 | + mmap_write_unlock(current->mm); |
---|
1369 | 1392 | } |
---|
1370 | 1393 | |
---|
1371 | 1394 | mutex_lock(&mem->lock); |
---|
.. | .. |
---|
1387 | 1410 | * the queues are still stopped and we can leave mapping for |
---|
1388 | 1411 | * the next restore worker |
---|
1389 | 1412 | */ |
---|
1390 | | - if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) |
---|
| 1413 | + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && |
---|
| 1414 | + bo->tbo.mem.mem_type == TTM_PL_SYSTEM) |
---|
1391 | 1415 | is_invalid_userptr = true; |
---|
1392 | 1416 | |
---|
1393 | 1417 | if (check_if_add_bo_to_vm(avm, mem)) { |
---|
.. | .. |
---|
1429 | 1453 | ret = map_bo_to_gpuvm(adev, entry, ctx.sync, |
---|
1430 | 1454 | is_invalid_userptr); |
---|
1431 | 1455 | if (ret) { |
---|
1432 | | - pr_err("Failed to map radeon bo to gpuvm\n"); |
---|
| 1456 | + pr_err("Failed to map bo to gpuvm\n"); |
---|
1433 | 1457 | goto map_bo_to_gpuvm_failed; |
---|
1434 | 1458 | } |
---|
1435 | 1459 | |
---|
.. | .. |
---|
1527 | 1551 | if (mem->mapped_to_gpu_memory == 0 && |
---|
1528 | 1552 | !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count) |
---|
1529 | 1553 | amdgpu_amdkfd_remove_eviction_fence(mem->bo, |
---|
1530 | | - process_info->eviction_fence, |
---|
1531 | | - NULL, NULL); |
---|
| 1554 | + process_info->eviction_fence); |
---|
1532 | 1555 | |
---|
1533 | 1556 | unreserve_out: |
---|
1534 | 1557 | unreserve_bo_and_vms(&ctx, false, false); |
---|
.. | .. |
---|
1589 | 1612 | } |
---|
1590 | 1613 | |
---|
1591 | 1614 | amdgpu_amdkfd_remove_eviction_fence( |
---|
1592 | | - bo, mem->process_info->eviction_fence, NULL, NULL); |
---|
| 1615 | + bo, mem->process_info->eviction_fence); |
---|
1593 | 1616 | list_del_init(&mem->validate_list.head); |
---|
1594 | 1617 | |
---|
1595 | 1618 | if (size) |
---|
.. | .. |
---|
1624 | 1647 | return 0; |
---|
1625 | 1648 | } |
---|
1626 | 1649 | |
---|
| 1650 | +int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, |
---|
| 1651 | + struct dma_buf *dma_buf, |
---|
| 1652 | + uint64_t va, void *vm, |
---|
| 1653 | + struct kgd_mem **mem, uint64_t *size, |
---|
| 1654 | + uint64_t *mmap_offset) |
---|
| 1655 | +{ |
---|
| 1656 | + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
---|
| 1657 | + struct drm_gem_object *obj; |
---|
| 1658 | + struct amdgpu_bo *bo; |
---|
| 1659 | + struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; |
---|
| 1660 | + |
---|
| 1661 | + if (dma_buf->ops != &amdgpu_dmabuf_ops) |
---|
| 1662 | + /* Can't handle non-graphics buffers */ |
---|
| 1663 | + return -EINVAL; |
---|
| 1664 | + |
---|
| 1665 | + obj = dma_buf->priv; |
---|
| 1666 | + if (drm_to_adev(obj->dev) != adev) |
---|
| 1667 | + /* Can't handle buffers from other devices */ |
---|
| 1668 | + return -EINVAL; |
---|
| 1669 | + |
---|
| 1670 | + bo = gem_to_amdgpu_bo(obj); |
---|
| 1671 | + if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | |
---|
| 1672 | + AMDGPU_GEM_DOMAIN_GTT))) |
---|
| 1673 | + /* Only VRAM and GTT BOs are supported */ |
---|
| 1674 | + return -EINVAL; |
---|
| 1675 | + |
---|
| 1676 | + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); |
---|
| 1677 | + if (!*mem) |
---|
| 1678 | + return -ENOMEM; |
---|
| 1679 | + |
---|
| 1680 | + if (size) |
---|
| 1681 | + *size = amdgpu_bo_size(bo); |
---|
| 1682 | + |
---|
| 1683 | + if (mmap_offset) |
---|
| 1684 | + *mmap_offset = amdgpu_bo_mmap_offset(bo); |
---|
| 1685 | + |
---|
| 1686 | + INIT_LIST_HEAD(&(*mem)->bo_va_list); |
---|
| 1687 | + mutex_init(&(*mem)->lock); |
---|
| 1688 | + |
---|
| 1689 | + (*mem)->alloc_flags = |
---|
| 1690 | + ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? |
---|
| 1691 | + KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT) |
---|
| 1692 | + | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
---|
| 1693 | + | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE; |
---|
| 1694 | + |
---|
| 1695 | + drm_gem_object_get(&bo->tbo.base); |
---|
| 1696 | + (*mem)->bo = bo; |
---|
| 1697 | + (*mem)->va = va; |
---|
| 1698 | + (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? |
---|
| 1699 | + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; |
---|
| 1700 | + (*mem)->mapped_to_gpu_memory = 0; |
---|
| 1701 | + (*mem)->process_info = avm->process_info; |
---|
| 1702 | + add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); |
---|
| 1703 | + amdgpu_sync_create(&(*mem)->sync); |
---|
| 1704 | + (*mem)->is_imported = true; |
---|
| 1705 | + |
---|
| 1706 | + return 0; |
---|
| 1707 | +} |
---|
| 1708 | + |
---|
1627 | 1709 | /* Evict a userptr BO by stopping the queues if necessary |
---|
1628 | 1710 | * |
---|
1629 | 1711 | * Runs in MMU notifier, may be in RECLAIM_FS context. This means it |
---|
.. | .. |
---|
1639 | 1721 | struct mm_struct *mm) |
---|
1640 | 1722 | { |
---|
1641 | 1723 | struct amdkfd_process_info *process_info = mem->process_info; |
---|
1642 | | - int invalid, evicted_bos; |
---|
| 1724 | + int evicted_bos; |
---|
1643 | 1725 | int r = 0; |
---|
1644 | 1726 | |
---|
1645 | | - invalid = atomic_inc_return(&mem->invalid); |
---|
| 1727 | + atomic_inc(&mem->invalid); |
---|
1646 | 1728 | evicted_bos = atomic_inc_return(&process_info->evicted_bos); |
---|
1647 | 1729 | if (evicted_bos == 1) { |
---|
1648 | 1730 | /* First eviction, stop the queues */ |
---|
1649 | | - r = kgd2kfd->quiesce_mm(mm); |
---|
| 1731 | + r = kgd2kfd_quiesce_mm(mm); |
---|
1650 | 1732 | if (r) |
---|
1651 | 1733 | pr_err("Failed to quiesce KFD\n"); |
---|
1652 | 1734 | schedule_delayed_work(&process_info->restore_userptr_work, |
---|
.. | .. |
---|
1711 | 1793 | |
---|
1712 | 1794 | bo = mem->bo; |
---|
1713 | 1795 | |
---|
1714 | | - if (!mem->user_pages) { |
---|
1715 | | - mem->user_pages = |
---|
1716 | | - kvmalloc_array(bo->tbo.ttm->num_pages, |
---|
1717 | | - sizeof(struct page *), |
---|
1718 | | - GFP_KERNEL | __GFP_ZERO); |
---|
1719 | | - if (!mem->user_pages) { |
---|
1720 | | - pr_err("%s: Failed to allocate pages array\n", |
---|
1721 | | - __func__); |
---|
1722 | | - return -ENOMEM; |
---|
1723 | | - } |
---|
1724 | | - } else if (mem->user_pages[0]) { |
---|
1725 | | - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); |
---|
| 1796 | + /* Get updated user pages */ |
---|
| 1797 | + ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); |
---|
| 1798 | + if (ret) { |
---|
| 1799 | + pr_debug("%s: Failed to get user pages: %d\n", |
---|
| 1800 | + __func__, ret); |
---|
| 1801 | + |
---|
| 1802 | + /* Return error -EBUSY or -ENOMEM, retry restore */ |
---|
| 1803 | + return ret; |
---|
1726 | 1804 | } |
---|
1727 | 1805 | |
---|
1728 | | - /* Get updated user pages */ |
---|
1729 | | - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, |
---|
1730 | | - mem->user_pages); |
---|
1731 | | - if (ret) { |
---|
1732 | | - mem->user_pages[0] = NULL; |
---|
1733 | | - pr_info("%s: Failed to get user pages: %d\n", |
---|
1734 | | - __func__, ret); |
---|
1735 | | - /* Pretend it succeeded. It will fail later |
---|
1736 | | - * with a VM fault if the GPU tries to access |
---|
1737 | | - * it. Better than hanging indefinitely with |
---|
1738 | | - * stalled user mode queues. |
---|
1739 | | - */ |
---|
1740 | | - } |
---|
| 1806 | + /* |
---|
| 1807 | + * FIXME: Cannot ignore the return code, must hold |
---|
| 1808 | + * notifier_lock |
---|
| 1809 | + */ |
---|
| 1810 | + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); |
---|
1741 | 1811 | |
---|
1742 | 1812 | /* Mark the BO as valid unless it was invalidated |
---|
1743 | | - * again concurrently |
---|
| 1813 | + * again concurrently. |
---|
1744 | 1814 | */ |
---|
1745 | 1815 | if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) |
---|
1746 | 1816 | return -EAGAIN; |
---|
.. | .. |
---|
1773 | 1843 | GFP_KERNEL); |
---|
1774 | 1844 | if (!pd_bo_list_entries) { |
---|
1775 | 1845 | pr_err("%s: Failed to allocate PD BO list entries\n", __func__); |
---|
1776 | | - return -ENOMEM; |
---|
| 1846 | + ret = -ENOMEM; |
---|
| 1847 | + goto out_no_mem; |
---|
1777 | 1848 | } |
---|
1778 | 1849 | |
---|
1779 | 1850 | INIT_LIST_HEAD(&resv_list); |
---|
.. | .. |
---|
1790 | 1861 | validate_list.head) { |
---|
1791 | 1862 | list_add_tail(&mem->resv_list.head, &resv_list); |
---|
1792 | 1863 | mem->resv_list.bo = mem->validate_list.bo; |
---|
1793 | | - mem->resv_list.shared = mem->validate_list.shared; |
---|
| 1864 | + mem->resv_list.num_shared = mem->validate_list.num_shared; |
---|
1794 | 1865 | } |
---|
1795 | 1866 | |
---|
1796 | 1867 | /* Reserve all BOs and page tables for validation */ |
---|
1797 | 1868 | ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); |
---|
1798 | 1869 | WARN(!list_empty(&duplicates), "Duplicates should be empty"); |
---|
1799 | 1870 | if (ret) |
---|
1800 | | - goto out; |
---|
| 1871 | + goto out_free; |
---|
1801 | 1872 | |
---|
1802 | 1873 | amdgpu_sync_create(&sync); |
---|
1803 | | - |
---|
1804 | | - /* Avoid triggering eviction fences when unmapping invalid |
---|
1805 | | - * userptr BOs (waits for all fences, doesn't use |
---|
1806 | | - * FENCE_OWNER_VM) |
---|
1807 | | - */ |
---|
1808 | | - list_for_each_entry(peer_vm, &process_info->vm_list_head, |
---|
1809 | | - vm_list_node) |
---|
1810 | | - amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo, |
---|
1811 | | - process_info->eviction_fence, |
---|
1812 | | - NULL, NULL); |
---|
1813 | 1874 | |
---|
1814 | 1875 | ret = process_validate_vms(process_info); |
---|
1815 | 1876 | if (ret) |
---|
.. | .. |
---|
1823 | 1884 | |
---|
1824 | 1885 | bo = mem->bo; |
---|
1825 | 1886 | |
---|
1826 | | - /* Copy pages array and validate the BO if we got user pages */ |
---|
1827 | | - if (mem->user_pages[0]) { |
---|
1828 | | - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, |
---|
1829 | | - mem->user_pages); |
---|
| 1887 | + /* Validate the BO if we got user pages */ |
---|
| 1888 | + if (bo->tbo.ttm->pages[0]) { |
---|
1830 | 1889 | amdgpu_bo_placement_from_domain(bo, mem->domain); |
---|
1831 | 1890 | ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); |
---|
1832 | 1891 | if (ret) { |
---|
.. | .. |
---|
1835 | 1894 | } |
---|
1836 | 1895 | } |
---|
1837 | 1896 | |
---|
1838 | | - /* Validate succeeded, now the BO owns the pages, free |
---|
1839 | | - * our copy of the pointer array. Put this BO back on |
---|
1840 | | - * the userptr_valid_list. If we need to revalidate |
---|
1841 | | - * it, we need to start from scratch. |
---|
1842 | | - */ |
---|
1843 | | - kvfree(mem->user_pages); |
---|
1844 | | - mem->user_pages = NULL; |
---|
1845 | 1897 | list_move_tail(&mem->validate_list.head, |
---|
1846 | 1898 | &process_info->userptr_valid_list); |
---|
1847 | 1899 | |
---|
.. | .. |
---|
1871 | 1923 | ret = process_update_pds(process_info, &sync); |
---|
1872 | 1924 | |
---|
1873 | 1925 | unreserve_out: |
---|
1874 | | - list_for_each_entry(peer_vm, &process_info->vm_list_head, |
---|
1875 | | - vm_list_node) |
---|
1876 | | - amdgpu_bo_fence(peer_vm->root.base.bo, |
---|
1877 | | - &process_info->eviction_fence->base, true); |
---|
1878 | 1926 | ttm_eu_backoff_reservation(&ticket, &resv_list); |
---|
1879 | 1927 | amdgpu_sync_wait(&sync, false); |
---|
1880 | 1928 | amdgpu_sync_free(&sync); |
---|
1881 | | -out: |
---|
| 1929 | +out_free: |
---|
1882 | 1930 | kfree(pd_bo_list_entries); |
---|
| 1931 | +out_no_mem: |
---|
1883 | 1932 | |
---|
1884 | 1933 | return ret; |
---|
1885 | 1934 | } |
---|
.. | .. |
---|
1938 | 1987 | evicted_bos) |
---|
1939 | 1988 | goto unlock_out; |
---|
1940 | 1989 | evicted_bos = 0; |
---|
1941 | | - if (kgd2kfd->resume_mm(mm)) { |
---|
| 1990 | + if (kgd2kfd_resume_mm(mm)) { |
---|
1942 | 1991 | pr_err("%s: Failed to resume KFD\n", __func__); |
---|
1943 | 1992 | /* No recovery from this failure. Probably the CP is |
---|
1944 | 1993 | * hanging. No point trying again. |
---|
1945 | 1994 | */ |
---|
1946 | 1995 | } |
---|
| 1996 | + |
---|
1947 | 1997 | unlock_out: |
---|
1948 | 1998 | mutex_unlock(&process_info->lock); |
---|
1949 | 1999 | mmput(mm); |
---|
.. | .. |
---|
2009 | 2059 | |
---|
2010 | 2060 | list_add_tail(&mem->resv_list.head, &ctx.list); |
---|
2011 | 2061 | mem->resv_list.bo = mem->validate_list.bo; |
---|
2012 | | - mem->resv_list.shared = mem->validate_list.shared; |
---|
| 2062 | + mem->resv_list.num_shared = mem->validate_list.num_shared; |
---|
2013 | 2063 | } |
---|
2014 | 2064 | |
---|
2015 | 2065 | ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, |
---|
.. | .. |
---|
2026 | 2076 | if (ret) |
---|
2027 | 2077 | goto validate_map_fail; |
---|
2028 | 2078 | |
---|
2029 | | - /* Wait for PD/PTs validate to finish */ |
---|
2030 | | - /* FIXME: I think this isn't needed */ |
---|
2031 | | - list_for_each_entry(peer_vm, &process_info->vm_list_head, |
---|
2032 | | - vm_list_node) { |
---|
2033 | | - struct amdgpu_bo *bo = peer_vm->root.base.bo; |
---|
2034 | | - |
---|
2035 | | - ttm_bo_wait(&bo->tbo, false, false); |
---|
| 2079 | + ret = process_sync_pds_resv(process_info, &sync_obj); |
---|
| 2080 | + if (ret) { |
---|
| 2081 | + pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n"); |
---|
| 2082 | + goto validate_map_fail; |
---|
2036 | 2083 | } |
---|
2037 | 2084 | |
---|
2038 | 2085 | /* Validate BOs and map them to GPUVM (update VM page tables). */ |
---|
.. | .. |
---|
2048 | 2095 | pr_debug("Memory eviction: Validate BOs failed. Try again\n"); |
---|
2049 | 2096 | goto validate_map_fail; |
---|
2050 | 2097 | } |
---|
2051 | | - |
---|
| 2098 | + ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving); |
---|
| 2099 | + if (ret) { |
---|
| 2100 | + pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); |
---|
| 2101 | + goto validate_map_fail; |
---|
| 2102 | + } |
---|
2052 | 2103 | list_for_each_entry(bo_va_entry, &mem->bo_va_list, |
---|
2053 | 2104 | bo_list) { |
---|
2054 | 2105 | ret = update_gpuvm_pte((struct amdgpu_device *) |
---|
.. | .. |
---|
2069 | 2120 | goto validate_map_fail; |
---|
2070 | 2121 | } |
---|
2071 | 2122 | |
---|
| 2123 | + /* Wait for validate and PT updates to finish */ |
---|
2072 | 2124 | amdgpu_sync_wait(&sync_obj, false); |
---|
2073 | 2125 | |
---|
2074 | 2126 | /* Release old eviction fence and create new one, because fence only |
---|
.. | .. |
---|
2087 | 2139 | process_info->eviction_fence = new_fence; |
---|
2088 | 2140 | *ef = dma_fence_get(&new_fence->base); |
---|
2089 | 2141 | |
---|
2090 | | - /* Wait for validate to finish and attach new eviction fence */ |
---|
2091 | | - list_for_each_entry(mem, &process_info->kfd_bo_list, |
---|
2092 | | - validate_list.head) |
---|
2093 | | - ttm_bo_wait(&mem->bo->tbo, false, false); |
---|
| 2142 | + /* Attach new eviction fence to all BOs */ |
---|
2094 | 2143 | list_for_each_entry(mem, &process_info->kfd_bo_list, |
---|
2095 | 2144 | validate_list.head) |
---|
2096 | 2145 | amdgpu_bo_fence(mem->bo, |
---|
.. | .. |
---|
2112 | 2161 | kfree(pd_bo_list); |
---|
2113 | 2162 | return ret; |
---|
2114 | 2163 | } |
---|
| 2164 | + |
---|
| 2165 | +int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem) |
---|
| 2166 | +{ |
---|
| 2167 | + struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; |
---|
| 2168 | + struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws; |
---|
| 2169 | + int ret; |
---|
| 2170 | + |
---|
| 2171 | + if (!info || !gws) |
---|
| 2172 | + return -EINVAL; |
---|
| 2173 | + |
---|
| 2174 | + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); |
---|
| 2175 | + if (!*mem) |
---|
| 2176 | + return -ENOMEM; |
---|
| 2177 | + |
---|
| 2178 | + mutex_init(&(*mem)->lock); |
---|
| 2179 | + INIT_LIST_HEAD(&(*mem)->bo_va_list); |
---|
| 2180 | + (*mem)->bo = amdgpu_bo_ref(gws_bo); |
---|
| 2181 | + (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS; |
---|
| 2182 | + (*mem)->process_info = process_info; |
---|
| 2183 | + add_kgd_mem_to_kfd_bo_list(*mem, process_info, false); |
---|
| 2184 | + amdgpu_sync_create(&(*mem)->sync); |
---|
| 2185 | + |
---|
| 2186 | + |
---|
| 2187 | + /* Validate gws bo the first time it is added to process */ |
---|
| 2188 | + mutex_lock(&(*mem)->process_info->lock); |
---|
| 2189 | + ret = amdgpu_bo_reserve(gws_bo, false); |
---|
| 2190 | + if (unlikely(ret)) { |
---|
| 2191 | + pr_err("Reserve gws bo failed %d\n", ret); |
---|
| 2192 | + goto bo_reservation_failure; |
---|
| 2193 | + } |
---|
| 2194 | + |
---|
| 2195 | + ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true); |
---|
| 2196 | + if (ret) { |
---|
| 2197 | + pr_err("GWS BO validate failed %d\n", ret); |
---|
| 2198 | + goto bo_validation_failure; |
---|
| 2199 | + } |
---|
| 2200 | + /* GWS resource is shared b/t amdgpu and amdkfd |
---|
| 2201 | + * Add process eviction fence to bo so they can |
---|
| 2202 | + * evict each other. |
---|
| 2203 | + */ |
---|
| 2204 | + ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1); |
---|
| 2205 | + if (ret) |
---|
| 2206 | + goto reserve_shared_fail; |
---|
| 2207 | + amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); |
---|
| 2208 | + amdgpu_bo_unreserve(gws_bo); |
---|
| 2209 | + mutex_unlock(&(*mem)->process_info->lock); |
---|
| 2210 | + |
---|
| 2211 | + return ret; |
---|
| 2212 | + |
---|
| 2213 | +reserve_shared_fail: |
---|
| 2214 | +bo_validation_failure: |
---|
| 2215 | + amdgpu_bo_unreserve(gws_bo); |
---|
| 2216 | +bo_reservation_failure: |
---|
| 2217 | + mutex_unlock(&(*mem)->process_info->lock); |
---|
| 2218 | + amdgpu_sync_free(&(*mem)->sync); |
---|
| 2219 | + remove_kgd_mem_from_kfd_bo_list(*mem, process_info); |
---|
| 2220 | + amdgpu_bo_unref(&gws_bo); |
---|
| 2221 | + mutex_destroy(&(*mem)->lock); |
---|
| 2222 | + kfree(*mem); |
---|
| 2223 | + *mem = NULL; |
---|
| 2224 | + return ret; |
---|
| 2225 | +} |
---|
| 2226 | + |
---|
| 2227 | +int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem) |
---|
| 2228 | +{ |
---|
| 2229 | + int ret; |
---|
| 2230 | + struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; |
---|
| 2231 | + struct kgd_mem *kgd_mem = (struct kgd_mem *)mem; |
---|
| 2232 | + struct amdgpu_bo *gws_bo = kgd_mem->bo; |
---|
| 2233 | + |
---|
| 2234 | + /* Remove BO from process's validate list so restore worker won't touch |
---|
| 2235 | + * it anymore |
---|
| 2236 | + */ |
---|
| 2237 | + remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info); |
---|
| 2238 | + |
---|
| 2239 | + ret = amdgpu_bo_reserve(gws_bo, false); |
---|
| 2240 | + if (unlikely(ret)) { |
---|
| 2241 | + pr_err("Reserve gws bo failed %d\n", ret); |
---|
| 2242 | + //TODO add BO back to validate_list? |
---|
| 2243 | + return ret; |
---|
| 2244 | + } |
---|
| 2245 | + amdgpu_amdkfd_remove_eviction_fence(gws_bo, |
---|
| 2246 | + process_info->eviction_fence); |
---|
| 2247 | + amdgpu_bo_unreserve(gws_bo); |
---|
| 2248 | + amdgpu_sync_free(&kgd_mem->sync); |
---|
| 2249 | + amdgpu_bo_unref(&gws_bo); |
---|
| 2250 | + mutex_destroy(&kgd_mem->lock); |
---|
| 2251 | + kfree(mem); |
---|
| 2252 | + return 0; |
---|
| 2253 | +} |
---|
| 2254 | + |
---|
| 2255 | +/* Returns GPU-specific tiling mode information */ |
---|
| 2256 | +int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, |
---|
| 2257 | + struct tile_config *config) |
---|
| 2258 | +{ |
---|
| 2259 | + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
---|
| 2260 | + |
---|
| 2261 | + config->gb_addr_config = adev->gfx.config.gb_addr_config; |
---|
| 2262 | + config->tile_config_ptr = adev->gfx.config.tile_mode_array; |
---|
| 2263 | + config->num_tile_configs = |
---|
| 2264 | + ARRAY_SIZE(adev->gfx.config.tile_mode_array); |
---|
| 2265 | + config->macro_tile_config_ptr = |
---|
| 2266 | + adev->gfx.config.macrotile_mode_array; |
---|
| 2267 | + config->num_macro_tile_configs = |
---|
| 2268 | + ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); |
---|
| 2269 | + |
---|
| 2270 | + /* Those values are not set from GFX9 onwards */ |
---|
| 2271 | + config->num_banks = adev->gfx.config.num_banks; |
---|
| 2272 | + config->num_ranks = adev->gfx.config.num_ranks; |
---|
| 2273 | + |
---|
| 2274 | + return 0; |
---|
| 2275 | +} |
---|