| .. | .. |
|---|
| 1 | 1 | // SPDX-License-Identifier: GPL-2.0+ |
|---|
| 2 | 2 | /* Copyright (C) 2015-2018 Broadcom */ |
|---|
| 3 | 3 | |
|---|
| 4 | | -#include <linux/reservation.h> |
|---|
| 5 | | -#include <linux/mm_types.h> |
|---|
| 6 | | -#include <drm/drmP.h> |
|---|
| 4 | +#include <linux/delay.h> |
|---|
| 5 | +#include <linux/mutex.h> |
|---|
| 6 | +#include <linux/spinlock_types.h> |
|---|
| 7 | +#include <linux/workqueue.h> |
|---|
| 8 | + |
|---|
| 7 | 9 | #include <drm/drm_encoder.h> |
|---|
| 8 | 10 | #include <drm/drm_gem.h> |
|---|
| 11 | +#include <drm/drm_gem_shmem_helper.h> |
|---|
| 9 | 12 | #include <drm/gpu_scheduler.h> |
|---|
| 13 | + |
|---|
| 14 | +#include "uapi/drm/v3d_drm.h" |
|---|
| 15 | + |
|---|
| 16 | +struct clk; |
|---|
| 17 | +struct platform_device; |
|---|
| 18 | +struct reset_control; |
|---|
| 10 | 19 | |
|---|
| 11 | 20 | #define GMP_GRANULARITY (128 * 1024) |
|---|
| 12 | 21 | |
|---|
| 13 | | -/* Enum for each of the V3D queues. We maintain various queue |
|---|
| 14 | | - * tracking as an array because at some point we'll want to support |
|---|
| 15 | | - * the TFU (texture formatting unit) as another queue. |
|---|
| 16 | | - */ |
|---|
| 22 | +/* Enum for each of the V3D queues. */ |
|---|
| 17 | 23 | enum v3d_queue { |
|---|
| 18 | 24 | V3D_BIN, |
|---|
| 19 | 25 | V3D_RENDER, |
|---|
| 26 | + V3D_TFU, |
|---|
| 27 | + V3D_CSD, |
|---|
| 28 | + V3D_CACHE_CLEAN, |
|---|
| 20 | 29 | }; |
|---|
| 21 | 30 | |
|---|
| 22 | | -#define V3D_MAX_QUEUES (V3D_RENDER + 1) |
|---|
| 31 | +#define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1) |
|---|
| 23 | 32 | |
|---|
| 24 | 33 | struct v3d_queue_state { |
|---|
| 25 | 34 | struct drm_gpu_scheduler sched; |
|---|
| .. | .. |
|---|
| 35 | 44 | * and revision. |
|---|
| 36 | 45 | */ |
|---|
| 37 | 46 | int ver; |
|---|
| 47 | + bool single_irq_line; |
|---|
| 38 | 48 | |
|---|
| 39 | | - struct device *dev; |
|---|
| 40 | | - struct platform_device *pdev; |
|---|
| 41 | 49 | void __iomem *hub_regs; |
|---|
| 42 | 50 | void __iomem *core_regs[3]; |
|---|
| 43 | 51 | void __iomem *bridge_regs; |
|---|
| 44 | 52 | void __iomem *gca_regs; |
|---|
| 45 | 53 | struct clk *clk; |
|---|
| 54 | + struct reset_control *reset; |
|---|
| 46 | 55 | |
|---|
| 47 | 56 | /* Virtual and DMA addresses of the single shared page table. */ |
|---|
| 48 | 57 | volatile u32 *pt; |
|---|
| .. | .. |
|---|
| 54 | 63 | */ |
|---|
| 55 | 64 | void *mmu_scratch; |
|---|
| 56 | 65 | dma_addr_t mmu_scratch_paddr; |
|---|
| 66 | + /* virtual address bits from V3D to the MMU. */ |
|---|
| 67 | + int va_width; |
|---|
| 57 | 68 | |
|---|
| 58 | 69 | /* Number of V3D cores. */ |
|---|
| 59 | 70 | u32 cores; |
|---|
| .. | .. |
|---|
| 66 | 77 | |
|---|
| 67 | 78 | struct work_struct overflow_mem_work; |
|---|
| 68 | 79 | |
|---|
| 69 | | - struct v3d_exec_info *bin_job; |
|---|
| 70 | | - struct v3d_exec_info *render_job; |
|---|
| 80 | + struct v3d_bin_job *bin_job; |
|---|
| 81 | + struct v3d_render_job *render_job; |
|---|
| 82 | + struct v3d_tfu_job *tfu_job; |
|---|
| 83 | + struct v3d_csd_job *csd_job; |
|---|
| 71 | 84 | |
|---|
| 72 | 85 | struct v3d_queue_state queue[V3D_MAX_QUEUES]; |
|---|
| 73 | 86 | |
|---|
| .. | .. |
|---|
| 90 | 103 | */ |
|---|
| 91 | 104 | struct mutex sched_lock; |
|---|
| 92 | 105 | |
|---|
| 106 | + /* Lock taken during a cache clean and when initiating an L2 |
|---|
| 107 | + * flush, to keep L2 flushes from interfering with the |
|---|
| 108 | + * synchronous L2 cleans. |
|---|
| 109 | + */ |
|---|
| 110 | + struct mutex cache_clean_lock; |
|---|
| 111 | + |
|---|
| 93 | 112 | struct { |
|---|
| 94 | 113 | u32 num_allocated; |
|---|
| 95 | 114 | u32 pages_allocated; |
|---|
| .. | .. |
|---|
| 99 | 118 | static inline struct v3d_dev * |
|---|
| 100 | 119 | to_v3d_dev(struct drm_device *dev) |
|---|
| 101 | 120 | { |
|---|
| 102 | | - return (struct v3d_dev *)dev->dev_private; |
|---|
| 121 | + return container_of(dev, struct v3d_dev, drm); |
|---|
| 103 | 122 | } |
|---|
| 123 | + |
|---|
| 124 | +static inline bool |
|---|
| 125 | +v3d_has_csd(struct v3d_dev *v3d) |
|---|
| 126 | +{ |
|---|
| 127 | + return v3d->ver >= 41; |
|---|
| 128 | +} |
|---|
| 129 | + |
|---|
| 130 | +#define v3d_to_pdev(v3d) to_platform_device((v3d)->drm.dev) |
|---|
| 104 | 131 | |
|---|
| 105 | 132 | /* The per-fd struct, which tracks the MMU mappings. */ |
|---|
| 106 | 133 | struct v3d_file_priv { |
|---|
| .. | .. |
|---|
| 109 | 136 | struct drm_sched_entity sched_entity[V3D_MAX_QUEUES]; |
|---|
| 110 | 137 | }; |
|---|
| 111 | 138 | |
|---|
| 112 | | -/* Tracks a mapping of a BO into a per-fd address space */ |
|---|
| 113 | | -struct v3d_vma { |
|---|
| 114 | | - struct v3d_page_table *pt; |
|---|
| 115 | | - struct list_head list; /* entry in v3d_bo.vmas */ |
|---|
| 116 | | -}; |
|---|
| 117 | | - |
|---|
| 118 | 139 | struct v3d_bo { |
|---|
| 119 | | - struct drm_gem_object base; |
|---|
| 120 | | - |
|---|
| 121 | | - struct mutex lock; |
|---|
| 140 | + struct drm_gem_shmem_object base; |
|---|
| 122 | 141 | |
|---|
| 123 | 142 | struct drm_mm_node node; |
|---|
| 124 | 143 | |
|---|
| 125 | | - u32 pages_refcount; |
|---|
| 126 | | - struct page **pages; |
|---|
| 127 | | - struct sg_table *sgt; |
|---|
| 128 | | - void *vaddr; |
|---|
| 129 | | - |
|---|
| 130 | | - struct list_head vmas; /* list of v3d_vma */ |
|---|
| 131 | | - |
|---|
| 132 | 144 | /* List entry for the BO's position in |
|---|
| 133 | | - * v3d_exec_info->unref_list |
|---|
| 145 | + * v3d_render_job->unref_list |
|---|
| 134 | 146 | */ |
|---|
| 135 | 147 | struct list_head unref_head; |
|---|
| 136 | | - |
|---|
| 137 | | - /* normally (resv == &_resv) except for imported bo's */ |
|---|
| 138 | | - struct reservation_object *resv; |
|---|
| 139 | | - struct reservation_object _resv; |
|---|
| 140 | 148 | }; |
|---|
| 141 | 149 | |
|---|
| 142 | 150 | static inline struct v3d_bo * |
|---|
| .. | .. |
|---|
| 174 | 182 | struct v3d_job { |
|---|
| 175 | 183 | struct drm_sched_job base; |
|---|
| 176 | 184 | |
|---|
| 177 | | - struct v3d_exec_info *exec; |
|---|
| 185 | + struct kref refcount; |
|---|
| 178 | 186 | |
|---|
| 179 | | - /* An optional fence userspace can pass in for the job to depend on. */ |
|---|
| 180 | | - struct dma_fence *in_fence; |
|---|
| 187 | + struct v3d_dev *v3d; |
|---|
| 188 | + |
|---|
| 189 | + /* This is the array of BOs that were looked up at the start |
|---|
| 190 | + * of submission. |
|---|
| 191 | + */ |
|---|
| 192 | + struct drm_gem_object **bo; |
|---|
| 193 | + u32 bo_count; |
|---|
| 194 | + |
|---|
| 195 | + /* Array of struct dma_fence * to block on before submitting this job. |
|---|
| 196 | + */ |
|---|
| 197 | + struct xarray deps; |
|---|
| 198 | + unsigned long last_dep; |
|---|
| 181 | 199 | |
|---|
| 182 | 200 | /* v3d fence to be signaled by IRQ handler when the job is complete. */ |
|---|
| 201 | + struct dma_fence *irq_fence; |
|---|
| 202 | + |
|---|
| 203 | + /* scheduler fence for when the job is considered complete and |
|---|
| 204 | + * the BO reservations can be released. |
|---|
| 205 | + */ |
|---|
| 183 | 206 | struct dma_fence *done_fence; |
|---|
| 207 | + |
|---|
| 208 | + /* Callback for the freeing of the job on refcount going to 0. */ |
|---|
| 209 | + void (*free)(struct kref *ref); |
|---|
| 210 | +}; |
|---|
| 211 | + |
|---|
| 212 | +struct v3d_bin_job { |
|---|
| 213 | + struct v3d_job base; |
|---|
| 184 | 214 | |
|---|
| 185 | 215 | /* GPU virtual addresses of the start/end of the CL job. */ |
|---|
| 186 | 216 | u32 start, end; |
|---|
| 187 | 217 | |
|---|
| 188 | 218 | u32 timedout_ctca, timedout_ctra; |
|---|
| 189 | | -}; |
|---|
| 190 | 219 | |
|---|
| 191 | | -struct v3d_exec_info { |
|---|
| 192 | | - struct v3d_dev *v3d; |
|---|
| 193 | | - |
|---|
| 194 | | - struct v3d_job bin, render; |
|---|
| 195 | | - |
|---|
| 196 | | - /* Fence for when the scheduler considers the binner to be |
|---|
| 197 | | - * done, for render to depend on. |
|---|
| 198 | | - */ |
|---|
| 199 | | - struct dma_fence *bin_done_fence; |
|---|
| 200 | | - |
|---|
| 201 | | - struct kref refcount; |
|---|
| 202 | | - |
|---|
| 203 | | - /* This is the array of BOs that were looked up at the start of exec. */ |
|---|
| 204 | | - struct v3d_bo **bo; |
|---|
| 205 | | - u32 bo_count; |
|---|
| 206 | | - |
|---|
| 207 | | - /* List of overflow BOs used in the job that need to be |
|---|
| 208 | | - * released once the job is complete. |
|---|
| 209 | | - */ |
|---|
| 210 | | - struct list_head unref_list; |
|---|
| 220 | + /* Corresponding render job, for attaching our overflow memory. */ |
|---|
| 221 | + struct v3d_render_job *render; |
|---|
| 211 | 222 | |
|---|
| 212 | 223 | /* Submitted tile memory allocation start/size, tile state. */ |
|---|
| 213 | 224 | u32 qma, qms, qts; |
|---|
| 214 | 225 | }; |
|---|
| 215 | 226 | |
|---|
| 227 | +struct v3d_render_job { |
|---|
| 228 | + struct v3d_job base; |
|---|
| 229 | + |
|---|
| 230 | + /* GPU virtual addresses of the start/end of the CL job. */ |
|---|
| 231 | + u32 start, end; |
|---|
| 232 | + |
|---|
| 233 | + u32 timedout_ctca, timedout_ctra; |
|---|
| 234 | + |
|---|
| 235 | + /* List of overflow BOs used in the job that need to be |
|---|
| 236 | + * released once the job is complete. |
|---|
| 237 | + */ |
|---|
| 238 | + struct list_head unref_list; |
|---|
| 239 | +}; |
|---|
| 240 | + |
|---|
| 241 | +struct v3d_tfu_job { |
|---|
| 242 | + struct v3d_job base; |
|---|
| 243 | + |
|---|
| 244 | + struct drm_v3d_submit_tfu args; |
|---|
| 245 | +}; |
|---|
| 246 | + |
|---|
| 247 | +struct v3d_csd_job { |
|---|
| 248 | + struct v3d_job base; |
|---|
| 249 | + |
|---|
| 250 | + u32 timedout_batches; |
|---|
| 251 | + |
|---|
| 252 | + struct drm_v3d_submit_csd args; |
|---|
| 253 | +}; |
|---|
| 254 | + |
|---|
| 216 | 255 | /** |
|---|
| 217 | | - * _wait_for - magic (register) wait macro |
|---|
| 256 | + * __wait_for - magic wait macro |
|---|
| 218 | 257 | * |
|---|
| 219 | | - * Does the right thing for modeset paths when run under kdgb or similar atomic |
|---|
| 220 | | - * contexts. Note that it's important that we check the condition again after |
|---|
| 221 | | - * having timed out, since the timeout could be due to preemption or similar and |
|---|
| 222 | | - * we've never had a chance to check the condition before the timeout. |
|---|
| 258 | + * Macro to help avoid open coding check/wait/timeout patterns. Note that it's |
|---|
| 259 | + * important that we check the condition again after having timed out, since the |
|---|
| 260 | + * timeout could be due to preemption or similar and we've never had a chance to |
|---|
| 261 | + * check the condition before the timeout. |
|---|
| 223 | 262 | */ |
|---|
| 224 | | -#define wait_for(COND, MS) ({ \ |
|---|
| 225 | | - unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1; \ |
|---|
| 226 | | - int ret__ = 0; \ |
|---|
| 227 | | - while (!(COND)) { \ |
|---|
| 228 | | - if (time_after(jiffies, timeout__)) { \ |
|---|
| 229 | | - if (!(COND)) \ |
|---|
| 230 | | - ret__ = -ETIMEDOUT; \ |
|---|
| 263 | +#define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ |
|---|
| 264 | + const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \ |
|---|
| 265 | + long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ |
|---|
| 266 | + int ret__; \ |
|---|
| 267 | + might_sleep(); \ |
|---|
| 268 | + for (;;) { \ |
|---|
| 269 | + const bool expired__ = ktime_after(ktime_get_raw(), end__); \ |
|---|
| 270 | + OP; \ |
|---|
| 271 | + /* Guarantee COND check prior to timeout */ \ |
|---|
| 272 | + barrier(); \ |
|---|
| 273 | + if (COND) { \ |
|---|
| 274 | + ret__ = 0; \ |
|---|
| 231 | 275 | break; \ |
|---|
| 232 | 276 | } \ |
|---|
| 233 | | - msleep(1); \ |
|---|
| 277 | + if (expired__) { \ |
|---|
| 278 | + ret__ = -ETIMEDOUT; \ |
|---|
| 279 | + break; \ |
|---|
| 280 | + } \ |
|---|
| 281 | + usleep_range(wait__, wait__ * 2); \ |
|---|
| 282 | + if (wait__ < (Wmax)) \ |
|---|
| 283 | + wait__ <<= 1; \ |
|---|
| 234 | 284 | } \ |
|---|
| 235 | 285 | ret__; \ |
|---|
| 236 | 286 | }) |
|---|
| 287 | + |
|---|
| 288 | +#define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \ |
|---|
| 289 | + (Wmax)) |
|---|
| 290 | +#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) |
|---|
| 237 | 291 | |
|---|
| 238 | 292 | static inline unsigned long nsecs_to_jiffies_timeout(const u64 n) |
|---|
| 239 | 293 | { |
|---|
| .. | .. |
|---|
| 246 | 300 | } |
|---|
| 247 | 301 | |
|---|
| 248 | 302 | /* v3d_bo.c */ |
|---|
| 303 | +struct drm_gem_object *v3d_create_object(struct drm_device *dev, size_t size); |
|---|
| 249 | 304 | void v3d_free_object(struct drm_gem_object *gem_obj); |
|---|
| 250 | 305 | struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv, |
|---|
| 251 | 306 | size_t size); |
|---|
| .. | .. |
|---|
| 255 | 310 | struct drm_file *file_priv); |
|---|
| 256 | 311 | int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data, |
|---|
| 257 | 312 | struct drm_file *file_priv); |
|---|
| 258 | | -vm_fault_t v3d_gem_fault(struct vm_fault *vmf); |
|---|
| 259 | | -int v3d_mmap(struct file *filp, struct vm_area_struct *vma); |
|---|
| 260 | | -struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj); |
|---|
| 261 | | -int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); |
|---|
| 262 | | -struct sg_table *v3d_prime_get_sg_table(struct drm_gem_object *obj); |
|---|
| 263 | 313 | struct drm_gem_object *v3d_prime_import_sg_table(struct drm_device *dev, |
|---|
| 264 | 314 | struct dma_buf_attachment *attach, |
|---|
| 265 | 315 | struct sg_table *sgt); |
|---|
| 266 | 316 | |
|---|
| 267 | 317 | /* v3d_debugfs.c */ |
|---|
| 268 | | -int v3d_debugfs_init(struct drm_minor *minor); |
|---|
| 318 | +void v3d_debugfs_init(struct drm_minor *minor); |
|---|
| 269 | 319 | |
|---|
| 270 | 320 | /* v3d_fence.c */ |
|---|
| 271 | 321 | extern const struct dma_fence_ops v3d_fence_ops; |
|---|
| .. | .. |
|---|
| 276 | 326 | void v3d_gem_destroy(struct drm_device *dev); |
|---|
| 277 | 327 | int v3d_submit_cl_ioctl(struct drm_device *dev, void *data, |
|---|
| 278 | 328 | struct drm_file *file_priv); |
|---|
| 329 | +int v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, |
|---|
| 330 | + struct drm_file *file_priv); |
|---|
| 331 | +int v3d_submit_csd_ioctl(struct drm_device *dev, void *data, |
|---|
| 332 | + struct drm_file *file_priv); |
|---|
| 279 | 333 | int v3d_wait_bo_ioctl(struct drm_device *dev, void *data, |
|---|
| 280 | 334 | struct drm_file *file_priv); |
|---|
| 281 | | -void v3d_exec_put(struct v3d_exec_info *exec); |
|---|
| 335 | +void v3d_job_put(struct v3d_job *job); |
|---|
| 282 | 336 | void v3d_reset(struct v3d_dev *v3d); |
|---|
| 283 | 337 | void v3d_invalidate_caches(struct v3d_dev *v3d); |
|---|
| 284 | | -void v3d_flush_caches(struct v3d_dev *v3d); |
|---|
| 338 | +void v3d_clean_caches(struct v3d_dev *v3d); |
|---|
| 285 | 339 | |
|---|
| 286 | 340 | /* v3d_irq.c */ |
|---|
| 287 | 341 | int v3d_irq_init(struct v3d_dev *v3d); |
|---|