| .. | .. |
|---|
| 27 | 27 | * |
|---|
| 28 | 28 | */ |
|---|
| 29 | 29 | |
|---|
| 30 | | -#include <generated/utsrelease.h> |
|---|
| 31 | | -#include <linux/stop_machine.h> |
|---|
| 32 | | -#include <linux/zlib.h> |
|---|
| 33 | | -#include <drm/drm_print.h> |
|---|
| 34 | 30 | #include <linux/ascii85.h> |
|---|
| 31 | +#include <linux/nmi.h> |
|---|
| 32 | +#include <linux/pagevec.h> |
|---|
| 33 | +#include <linux/scatterlist.h> |
|---|
| 34 | +#include <linux/utsname.h> |
|---|
| 35 | +#include <linux/zlib.h> |
|---|
| 35 | 36 | |
|---|
| 36 | | -#include "i915_gpu_error.h" |
|---|
| 37 | +#include <drm/drm_print.h> |
|---|
| 38 | + |
|---|
| 39 | +#include "display/intel_atomic.h" |
|---|
| 40 | +#include "display/intel_csr.h" |
|---|
| 41 | +#include "display/intel_overlay.h" |
|---|
| 42 | + |
|---|
| 43 | +#include "gem/i915_gem_context.h" |
|---|
| 44 | +#include "gem/i915_gem_lmem.h" |
|---|
| 45 | +#include "gt/intel_gt.h" |
|---|
| 46 | +#include "gt/intel_gt_pm.h" |
|---|
| 47 | + |
|---|
| 37 | 48 | #include "i915_drv.h" |
|---|
| 49 | +#include "i915_gpu_error.h" |
|---|
| 50 | +#include "i915_memcpy.h" |
|---|
| 51 | +#include "i915_scatterlist.h" |
|---|
| 38 | 52 | |
|---|
| 39 | | -static inline const struct intel_engine_cs * |
|---|
| 40 | | -engine_lookup(const struct drm_i915_private *i915, unsigned int id) |
|---|
| 53 | +#define ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) |
|---|
| 54 | +#define ATOMIC_MAYFAIL (GFP_ATOMIC | __GFP_NOWARN) |
|---|
| 55 | + |
|---|
| 56 | +static void __sg_set_buf(struct scatterlist *sg, |
|---|
| 57 | + void *addr, unsigned int len, loff_t it) |
|---|
| 41 | 58 | { |
|---|
| 42 | | - if (id >= I915_NUM_ENGINES) |
|---|
| 43 | | - return NULL; |
|---|
| 44 | | - |
|---|
| 45 | | - return i915->engine[id]; |
|---|
| 59 | + sg->page_link = (unsigned long)virt_to_page(addr); |
|---|
| 60 | + sg->offset = offset_in_page(addr); |
|---|
| 61 | + sg->length = len; |
|---|
| 62 | + sg->dma_address = it; |
|---|
| 46 | 63 | } |
|---|
| 47 | 64 | |
|---|
| 48 | | -static inline const char * |
|---|
| 49 | | -__engine_name(const struct intel_engine_cs *engine) |
|---|
| 65 | +static bool __i915_error_grow(struct drm_i915_error_state_buf *e, size_t len) |
|---|
| 50 | 66 | { |
|---|
| 51 | | - return engine ? engine->name : ""; |
|---|
| 52 | | -} |
|---|
| 53 | | - |
|---|
| 54 | | -static const char * |
|---|
| 55 | | -engine_name(const struct drm_i915_private *i915, unsigned int id) |
|---|
| 56 | | -{ |
|---|
| 57 | | - return __engine_name(engine_lookup(i915, id)); |
|---|
| 58 | | -} |
|---|
| 59 | | - |
|---|
| 60 | | -static const char *tiling_flag(int tiling) |
|---|
| 61 | | -{ |
|---|
| 62 | | - switch (tiling) { |
|---|
| 63 | | - default: |
|---|
| 64 | | - case I915_TILING_NONE: return ""; |
|---|
| 65 | | - case I915_TILING_X: return " X"; |
|---|
| 66 | | - case I915_TILING_Y: return " Y"; |
|---|
| 67 | | - } |
|---|
| 68 | | -} |
|---|
| 69 | | - |
|---|
| 70 | | -static const char *dirty_flag(int dirty) |
|---|
| 71 | | -{ |
|---|
| 72 | | - return dirty ? " dirty" : ""; |
|---|
| 73 | | -} |
|---|
| 74 | | - |
|---|
| 75 | | -static const char *purgeable_flag(int purgeable) |
|---|
| 76 | | -{ |
|---|
| 77 | | - return purgeable ? " purgeable" : ""; |
|---|
| 78 | | -} |
|---|
| 79 | | - |
|---|
| 80 | | -static bool __i915_error_ok(struct drm_i915_error_state_buf *e) |
|---|
| 81 | | -{ |
|---|
| 82 | | - |
|---|
| 83 | | - if (!e->err && WARN(e->bytes > (e->size - 1), "overflow")) { |
|---|
| 84 | | - e->err = -ENOSPC; |
|---|
| 85 | | - return false; |
|---|
| 86 | | - } |
|---|
| 87 | | - |
|---|
| 88 | | - if (e->bytes == e->size - 1 || e->err) |
|---|
| 67 | + if (!len) |
|---|
| 89 | 68 | return false; |
|---|
| 90 | 69 | |
|---|
| 91 | | - return true; |
|---|
| 92 | | -} |
|---|
| 70 | + if (e->bytes + len + 1 <= e->size) |
|---|
| 71 | + return true; |
|---|
| 93 | 72 | |
|---|
| 94 | | -static bool __i915_error_seek(struct drm_i915_error_state_buf *e, |
|---|
| 95 | | - unsigned len) |
|---|
| 96 | | -{ |
|---|
| 97 | | - if (e->pos + len <= e->start) { |
|---|
| 98 | | - e->pos += len; |
|---|
| 99 | | - return false; |
|---|
| 73 | + if (e->bytes) { |
|---|
| 74 | + __sg_set_buf(e->cur++, e->buf, e->bytes, e->iter); |
|---|
| 75 | + e->iter += e->bytes; |
|---|
| 76 | + e->buf = NULL; |
|---|
| 77 | + e->bytes = 0; |
|---|
| 100 | 78 | } |
|---|
| 101 | 79 | |
|---|
| 102 | | - /* First vsnprintf needs to fit in its entirety for memmove */ |
|---|
| 103 | | - if (len >= e->size) { |
|---|
| 104 | | - e->err = -EIO; |
|---|
| 105 | | - return false; |
|---|
| 106 | | - } |
|---|
| 80 | + if (e->cur == e->end) { |
|---|
| 81 | + struct scatterlist *sgl; |
|---|
| 107 | 82 | |
|---|
| 108 | | - return true; |
|---|
| 109 | | -} |
|---|
| 110 | | - |
|---|
| 111 | | -static void __i915_error_advance(struct drm_i915_error_state_buf *e, |
|---|
| 112 | | - unsigned len) |
|---|
| 113 | | -{ |
|---|
| 114 | | - /* If this is first printf in this window, adjust it so that |
|---|
| 115 | | - * start position matches start of the buffer |
|---|
| 116 | | - */ |
|---|
| 117 | | - |
|---|
| 118 | | - if (e->pos < e->start) { |
|---|
| 119 | | - const size_t off = e->start - e->pos; |
|---|
| 120 | | - |
|---|
| 121 | | - /* Should not happen but be paranoid */ |
|---|
| 122 | | - if (off > len || e->bytes) { |
|---|
| 123 | | - e->err = -EIO; |
|---|
| 124 | | - return; |
|---|
| 83 | + sgl = (typeof(sgl))__get_free_page(ALLOW_FAIL); |
|---|
| 84 | + if (!sgl) { |
|---|
| 85 | + e->err = -ENOMEM; |
|---|
| 86 | + return false; |
|---|
| 125 | 87 | } |
|---|
| 126 | 88 | |
|---|
| 127 | | - memmove(e->buf, e->buf + off, len - off); |
|---|
| 128 | | - e->bytes = len - off; |
|---|
| 129 | | - e->pos = e->start; |
|---|
| 130 | | - return; |
|---|
| 89 | + if (e->cur) { |
|---|
| 90 | + e->cur->offset = 0; |
|---|
| 91 | + e->cur->length = 0; |
|---|
| 92 | + e->cur->page_link = |
|---|
| 93 | + (unsigned long)sgl | SG_CHAIN; |
|---|
| 94 | + } else { |
|---|
| 95 | + e->sgl = sgl; |
|---|
| 96 | + } |
|---|
| 97 | + |
|---|
| 98 | + e->cur = sgl; |
|---|
| 99 | + e->end = sgl + SG_MAX_SINGLE_ALLOC - 1; |
|---|
| 131 | 100 | } |
|---|
| 132 | 101 | |
|---|
| 133 | | - e->bytes += len; |
|---|
| 134 | | - e->pos += len; |
|---|
| 102 | + e->size = ALIGN(len + 1, SZ_64K); |
|---|
| 103 | + e->buf = kmalloc(e->size, ALLOW_FAIL); |
|---|
| 104 | + if (!e->buf) { |
|---|
| 105 | + e->size = PAGE_ALIGN(len + 1); |
|---|
| 106 | + e->buf = kmalloc(e->size, GFP_KERNEL); |
|---|
| 107 | + } |
|---|
| 108 | + if (!e->buf) { |
|---|
| 109 | + e->err = -ENOMEM; |
|---|
| 110 | + return false; |
|---|
| 111 | + } |
|---|
| 112 | + |
|---|
| 113 | + return true; |
|---|
| 135 | 114 | } |
|---|
| 136 | 115 | |
|---|
| 137 | 116 | __printf(2, 0) |
|---|
| 138 | 117 | static void i915_error_vprintf(struct drm_i915_error_state_buf *e, |
|---|
| 139 | | - const char *f, va_list args) |
|---|
| 118 | + const char *fmt, va_list args) |
|---|
| 140 | 119 | { |
|---|
| 141 | | - unsigned len; |
|---|
| 120 | + va_list ap; |
|---|
| 121 | + int len; |
|---|
| 142 | 122 | |
|---|
| 143 | | - if (!__i915_error_ok(e)) |
|---|
| 123 | + if (e->err) |
|---|
| 144 | 124 | return; |
|---|
| 145 | 125 | |
|---|
| 146 | | - /* Seek the first printf which is hits start position */ |
|---|
| 147 | | - if (e->pos < e->start) { |
|---|
| 148 | | - va_list tmp; |
|---|
| 149 | | - |
|---|
| 150 | | - va_copy(tmp, args); |
|---|
| 151 | | - len = vsnprintf(NULL, 0, f, tmp); |
|---|
| 152 | | - va_end(tmp); |
|---|
| 153 | | - |
|---|
| 154 | | - if (!__i915_error_seek(e, len)) |
|---|
| 155 | | - return; |
|---|
| 126 | + va_copy(ap, args); |
|---|
| 127 | + len = vsnprintf(NULL, 0, fmt, ap); |
|---|
| 128 | + va_end(ap); |
|---|
| 129 | + if (len <= 0) { |
|---|
| 130 | + e->err = len; |
|---|
| 131 | + return; |
|---|
| 156 | 132 | } |
|---|
| 157 | 133 | |
|---|
| 158 | | - len = vsnprintf(e->buf + e->bytes, e->size - e->bytes, f, args); |
|---|
| 159 | | - if (len >= e->size - e->bytes) |
|---|
| 160 | | - len = e->size - e->bytes - 1; |
|---|
| 134 | + if (!__i915_error_grow(e, len)) |
|---|
| 135 | + return; |
|---|
| 161 | 136 | |
|---|
| 162 | | - __i915_error_advance(e, len); |
|---|
| 137 | + GEM_BUG_ON(e->bytes >= e->size); |
|---|
| 138 | + len = vscnprintf(e->buf + e->bytes, e->size - e->bytes, fmt, args); |
|---|
| 139 | + if (len < 0) { |
|---|
| 140 | + e->err = len; |
|---|
| 141 | + return; |
|---|
| 142 | + } |
|---|
| 143 | + e->bytes += len; |
|---|
| 163 | 144 | } |
|---|
| 164 | 145 | |
|---|
| 165 | | -static void i915_error_puts(struct drm_i915_error_state_buf *e, |
|---|
| 166 | | - const char *str) |
|---|
| 146 | +static void i915_error_puts(struct drm_i915_error_state_buf *e, const char *str) |
|---|
| 167 | 147 | { |
|---|
| 168 | 148 | unsigned len; |
|---|
| 169 | 149 | |
|---|
| 170 | | - if (!__i915_error_ok(e)) |
|---|
| 150 | + if (e->err || !str) |
|---|
| 171 | 151 | return; |
|---|
| 172 | 152 | |
|---|
| 173 | 153 | len = strlen(str); |
|---|
| 154 | + if (!__i915_error_grow(e, len)) |
|---|
| 155 | + return; |
|---|
| 174 | 156 | |
|---|
| 175 | | - /* Seek the first printf which is hits start position */ |
|---|
| 176 | | - if (e->pos < e->start) { |
|---|
| 177 | | - if (!__i915_error_seek(e, len)) |
|---|
| 178 | | - return; |
|---|
| 179 | | - } |
|---|
| 180 | | - |
|---|
| 181 | | - if (len >= e->size - e->bytes) |
|---|
| 182 | | - len = e->size - e->bytes - 1; |
|---|
| 157 | + GEM_BUG_ON(e->bytes + len > e->size); |
|---|
| 183 | 158 | memcpy(e->buf + e->bytes, str, len); |
|---|
| 184 | | - |
|---|
| 185 | | - __i915_error_advance(e, len); |
|---|
| 159 | + e->bytes += len; |
|---|
| 186 | 160 | } |
|---|
| 187 | 161 | |
|---|
| 188 | 162 | #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__) |
|---|
| .. | .. |
|---|
| 203 | 177 | return p; |
|---|
| 204 | 178 | } |
|---|
| 205 | 179 | |
|---|
| 180 | +/* single threaded page allocator with a reserved stash for emergencies */ |
|---|
| 181 | +static void pool_fini(struct pagevec *pv) |
|---|
| 182 | +{ |
|---|
| 183 | + pagevec_release(pv); |
|---|
| 184 | +} |
|---|
| 185 | + |
|---|
| 186 | +static int pool_refill(struct pagevec *pv, gfp_t gfp) |
|---|
| 187 | +{ |
|---|
| 188 | + while (pagevec_space(pv)) { |
|---|
| 189 | + struct page *p; |
|---|
| 190 | + |
|---|
| 191 | + p = alloc_page(gfp); |
|---|
| 192 | + if (!p) |
|---|
| 193 | + return -ENOMEM; |
|---|
| 194 | + |
|---|
| 195 | + pagevec_add(pv, p); |
|---|
| 196 | + } |
|---|
| 197 | + |
|---|
| 198 | + return 0; |
|---|
| 199 | +} |
|---|
| 200 | + |
|---|
| 201 | +static int pool_init(struct pagevec *pv, gfp_t gfp) |
|---|
| 202 | +{ |
|---|
| 203 | + int err; |
|---|
| 204 | + |
|---|
| 205 | + pagevec_init(pv); |
|---|
| 206 | + |
|---|
| 207 | + err = pool_refill(pv, gfp); |
|---|
| 208 | + if (err) |
|---|
| 209 | + pool_fini(pv); |
|---|
| 210 | + |
|---|
| 211 | + return err; |
|---|
| 212 | +} |
|---|
| 213 | + |
|---|
| 214 | +static void *pool_alloc(struct pagevec *pv, gfp_t gfp) |
|---|
| 215 | +{ |
|---|
| 216 | + struct page *p; |
|---|
| 217 | + |
|---|
| 218 | + p = alloc_page(gfp); |
|---|
| 219 | + if (!p && pagevec_count(pv)) |
|---|
| 220 | + p = pv->pages[--pv->nr]; |
|---|
| 221 | + |
|---|
| 222 | + return p ? page_address(p) : NULL; |
|---|
| 223 | +} |
|---|
| 224 | + |
|---|
| 225 | +static void pool_free(struct pagevec *pv, void *addr) |
|---|
| 226 | +{ |
|---|
| 227 | + struct page *p = virt_to_page(addr); |
|---|
| 228 | + |
|---|
| 229 | + if (pagevec_space(pv)) |
|---|
| 230 | + pagevec_add(pv, p); |
|---|
| 231 | + else |
|---|
| 232 | + __free_page(p); |
|---|
| 233 | +} |
|---|
| 234 | + |
|---|
| 206 | 235 | #ifdef CONFIG_DRM_I915_COMPRESS_ERROR |
|---|
| 207 | 236 | |
|---|
| 208 | | -struct compress { |
|---|
| 237 | +struct i915_vma_compress { |
|---|
| 238 | + struct pagevec pool; |
|---|
| 209 | 239 | struct z_stream_s zstream; |
|---|
| 210 | 240 | void *tmp; |
|---|
| 211 | 241 | }; |
|---|
| 212 | 242 | |
|---|
| 213 | | -static bool compress_init(struct compress *c) |
|---|
| 243 | +static bool compress_init(struct i915_vma_compress *c) |
|---|
| 214 | 244 | { |
|---|
| 215 | | - struct z_stream_s *zstream = memset(&c->zstream, 0, sizeof(c->zstream)); |
|---|
| 245 | + struct z_stream_s *zstream = &c->zstream; |
|---|
| 246 | + |
|---|
| 247 | + if (pool_init(&c->pool, ALLOW_FAIL)) |
|---|
| 248 | + return false; |
|---|
| 216 | 249 | |
|---|
| 217 | 250 | zstream->workspace = |
|---|
| 218 | 251 | kmalloc(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), |
|---|
| 219 | | - GFP_ATOMIC | __GFP_NOWARN); |
|---|
| 220 | | - if (!zstream->workspace) |
|---|
| 221 | | - return false; |
|---|
| 222 | | - |
|---|
| 223 | | - if (zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) != Z_OK) { |
|---|
| 224 | | - kfree(zstream->workspace); |
|---|
| 252 | + ALLOW_FAIL); |
|---|
| 253 | + if (!zstream->workspace) { |
|---|
| 254 | + pool_fini(&c->pool); |
|---|
| 225 | 255 | return false; |
|---|
| 226 | 256 | } |
|---|
| 227 | 257 | |
|---|
| 228 | 258 | c->tmp = NULL; |
|---|
| 229 | 259 | if (i915_has_memcpy_from_wc()) |
|---|
| 230 | | - c->tmp = (void *)__get_free_page(GFP_ATOMIC | __GFP_NOWARN); |
|---|
| 260 | + c->tmp = pool_alloc(&c->pool, ALLOW_FAIL); |
|---|
| 231 | 261 | |
|---|
| 232 | 262 | return true; |
|---|
| 233 | 263 | } |
|---|
| 234 | 264 | |
|---|
| 235 | | -static void *compress_next_page(struct drm_i915_error_object *dst) |
|---|
| 265 | +static bool compress_start(struct i915_vma_compress *c) |
|---|
| 236 | 266 | { |
|---|
| 237 | | - unsigned long page; |
|---|
| 267 | + struct z_stream_s *zstream = &c->zstream; |
|---|
| 268 | + void *workspace = zstream->workspace; |
|---|
| 269 | + |
|---|
| 270 | + memset(zstream, 0, sizeof(*zstream)); |
|---|
| 271 | + zstream->workspace = workspace; |
|---|
| 272 | + |
|---|
| 273 | + return zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) == Z_OK; |
|---|
| 274 | +} |
|---|
| 275 | + |
|---|
| 276 | +static void *compress_next_page(struct i915_vma_compress *c, |
|---|
| 277 | + struct i915_vma_coredump *dst) |
|---|
| 278 | +{ |
|---|
| 279 | + void *page; |
|---|
| 238 | 280 | |
|---|
| 239 | 281 | if (dst->page_count >= dst->num_pages) |
|---|
| 240 | 282 | return ERR_PTR(-ENOSPC); |
|---|
| 241 | 283 | |
|---|
| 242 | | - page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); |
|---|
| 284 | + page = pool_alloc(&c->pool, ALLOW_FAIL); |
|---|
| 243 | 285 | if (!page) |
|---|
| 244 | 286 | return ERR_PTR(-ENOMEM); |
|---|
| 245 | 287 | |
|---|
| 246 | | - return dst->pages[dst->page_count++] = (void *)page; |
|---|
| 288 | + return dst->pages[dst->page_count++] = page; |
|---|
| 247 | 289 | } |
|---|
| 248 | 290 | |
|---|
| 249 | | -static int compress_page(struct compress *c, |
|---|
| 291 | +static int compress_page(struct i915_vma_compress *c, |
|---|
| 250 | 292 | void *src, |
|---|
| 251 | | - struct drm_i915_error_object *dst) |
|---|
| 293 | + struct i915_vma_coredump *dst, |
|---|
| 294 | + bool wc) |
|---|
| 252 | 295 | { |
|---|
| 253 | 296 | struct z_stream_s *zstream = &c->zstream; |
|---|
| 254 | 297 | |
|---|
| 255 | 298 | zstream->next_in = src; |
|---|
| 256 | | - if (c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE)) |
|---|
| 299 | + if (wc && c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE)) |
|---|
| 257 | 300 | zstream->next_in = c->tmp; |
|---|
| 258 | 301 | zstream->avail_in = PAGE_SIZE; |
|---|
| 259 | 302 | |
|---|
| 260 | 303 | do { |
|---|
| 261 | 304 | if (zstream->avail_out == 0) { |
|---|
| 262 | | - zstream->next_out = compress_next_page(dst); |
|---|
| 305 | + zstream->next_out = compress_next_page(c, dst); |
|---|
| 263 | 306 | if (IS_ERR(zstream->next_out)) |
|---|
| 264 | 307 | return PTR_ERR(zstream->next_out); |
|---|
| 265 | 308 | |
|---|
| .. | .. |
|---|
| 279 | 322 | return 0; |
|---|
| 280 | 323 | } |
|---|
| 281 | 324 | |
|---|
| 282 | | -static int compress_flush(struct compress *c, |
|---|
| 283 | | - struct drm_i915_error_object *dst) |
|---|
| 325 | +static int compress_flush(struct i915_vma_compress *c, |
|---|
| 326 | + struct i915_vma_coredump *dst) |
|---|
| 284 | 327 | { |
|---|
| 285 | 328 | struct z_stream_s *zstream = &c->zstream; |
|---|
| 286 | 329 | |
|---|
| 287 | 330 | do { |
|---|
| 288 | 331 | switch (zlib_deflate(zstream, Z_FINISH)) { |
|---|
| 289 | 332 | case Z_OK: /* more space requested */ |
|---|
| 290 | | - zstream->next_out = compress_next_page(dst); |
|---|
| 333 | + zstream->next_out = compress_next_page(c, dst); |
|---|
| 291 | 334 | if (IS_ERR(zstream->next_out)) |
|---|
| 292 | 335 | return PTR_ERR(zstream->next_out); |
|---|
| 293 | 336 | |
|---|
| .. | .. |
|---|
| 308 | 351 | return 0; |
|---|
| 309 | 352 | } |
|---|
| 310 | 353 | |
|---|
| 311 | | -static void compress_fini(struct compress *c, |
|---|
| 312 | | - struct drm_i915_error_object *dst) |
|---|
| 354 | +static void compress_finish(struct i915_vma_compress *c) |
|---|
| 313 | 355 | { |
|---|
| 314 | | - struct z_stream_s *zstream = &c->zstream; |
|---|
| 356 | + zlib_deflateEnd(&c->zstream); |
|---|
| 357 | +} |
|---|
| 315 | 358 | |
|---|
| 316 | | - zlib_deflateEnd(zstream); |
|---|
| 317 | | - kfree(zstream->workspace); |
|---|
| 359 | +static void compress_fini(struct i915_vma_compress *c) |
|---|
| 360 | +{ |
|---|
| 361 | + kfree(c->zstream.workspace); |
|---|
| 318 | 362 | if (c->tmp) |
|---|
| 319 | | - free_page((unsigned long)c->tmp); |
|---|
| 363 | + pool_free(&c->pool, c->tmp); |
|---|
| 364 | + pool_fini(&c->pool); |
|---|
| 320 | 365 | } |
|---|
| 321 | 366 | |
|---|
| 322 | 367 | static void err_compression_marker(struct drm_i915_error_state_buf *m) |
|---|
| .. | .. |
|---|
| 326 | 371 | |
|---|
| 327 | 372 | #else |
|---|
| 328 | 373 | |
|---|
| 329 | | -struct compress { |
|---|
| 374 | +struct i915_vma_compress { |
|---|
| 375 | + struct pagevec pool; |
|---|
| 330 | 376 | }; |
|---|
| 331 | 377 | |
|---|
| 332 | | -static bool compress_init(struct compress *c) |
|---|
| 378 | +static bool compress_init(struct i915_vma_compress *c) |
|---|
| 379 | +{ |
|---|
| 380 | + return pool_init(&c->pool, ALLOW_FAIL) == 0; |
|---|
| 381 | +} |
|---|
| 382 | + |
|---|
| 383 | +static bool compress_start(struct i915_vma_compress *c) |
|---|
| 333 | 384 | { |
|---|
| 334 | 385 | return true; |
|---|
| 335 | 386 | } |
|---|
| 336 | 387 | |
|---|
| 337 | | -static int compress_page(struct compress *c, |
|---|
| 388 | +static int compress_page(struct i915_vma_compress *c, |
|---|
| 338 | 389 | void *src, |
|---|
| 339 | | - struct drm_i915_error_object *dst) |
|---|
| 390 | + struct i915_vma_coredump *dst, |
|---|
| 391 | + bool wc) |
|---|
| 340 | 392 | { |
|---|
| 341 | | - unsigned long page; |
|---|
| 342 | 393 | void *ptr; |
|---|
| 343 | 394 | |
|---|
| 344 | | - page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); |
|---|
| 345 | | - if (!page) |
|---|
| 395 | + ptr = pool_alloc(&c->pool, ALLOW_FAIL); |
|---|
| 396 | + if (!ptr) |
|---|
| 346 | 397 | return -ENOMEM; |
|---|
| 347 | 398 | |
|---|
| 348 | | - ptr = (void *)page; |
|---|
| 349 | | - if (!i915_memcpy_from_wc(ptr, src, PAGE_SIZE)) |
|---|
| 399 | + if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE))) |
|---|
| 350 | 400 | memcpy(ptr, src, PAGE_SIZE); |
|---|
| 351 | 401 | dst->pages[dst->page_count++] = ptr; |
|---|
| 352 | 402 | cond_resched(); |
|---|
| .. | .. |
|---|
| 354 | 404 | return 0; |
|---|
| 355 | 405 | } |
|---|
| 356 | 406 | |
|---|
| 357 | | -static int compress_flush(struct compress *c, |
|---|
| 358 | | - struct drm_i915_error_object *dst) |
|---|
| 407 | +static int compress_flush(struct i915_vma_compress *c, |
|---|
| 408 | + struct i915_vma_coredump *dst) |
|---|
| 359 | 409 | { |
|---|
| 360 | 410 | return 0; |
|---|
| 361 | 411 | } |
|---|
| 362 | 412 | |
|---|
| 363 | | -static void compress_fini(struct compress *c, |
|---|
| 364 | | - struct drm_i915_error_object *dst) |
|---|
| 413 | +static void compress_finish(struct i915_vma_compress *c) |
|---|
| 365 | 414 | { |
|---|
| 415 | +} |
|---|
| 416 | + |
|---|
| 417 | +static void compress_fini(struct i915_vma_compress *c) |
|---|
| 418 | +{ |
|---|
| 419 | + pool_fini(&c->pool); |
|---|
| 366 | 420 | } |
|---|
| 367 | 421 | |
|---|
| 368 | 422 | static void err_compression_marker(struct drm_i915_error_state_buf *m) |
|---|
| .. | .. |
|---|
| 372 | 426 | |
|---|
| 373 | 427 | #endif |
|---|
| 374 | 428 | |
|---|
| 375 | | -static void print_error_buffers(struct drm_i915_error_state_buf *m, |
|---|
| 376 | | - const char *name, |
|---|
| 377 | | - struct drm_i915_error_buffer *err, |
|---|
| 378 | | - int count) |
|---|
| 379 | | -{ |
|---|
| 380 | | - err_printf(m, "%s [%d]:\n", name, count); |
|---|
| 381 | | - |
|---|
| 382 | | - while (count--) { |
|---|
| 383 | | - err_printf(m, " %08x_%08x %8u %02x %02x %02x", |
|---|
| 384 | | - upper_32_bits(err->gtt_offset), |
|---|
| 385 | | - lower_32_bits(err->gtt_offset), |
|---|
| 386 | | - err->size, |
|---|
| 387 | | - err->read_domains, |
|---|
| 388 | | - err->write_domain, |
|---|
| 389 | | - err->wseqno); |
|---|
| 390 | | - err_puts(m, tiling_flag(err->tiling)); |
|---|
| 391 | | - err_puts(m, dirty_flag(err->dirty)); |
|---|
| 392 | | - err_puts(m, purgeable_flag(err->purgeable)); |
|---|
| 393 | | - err_puts(m, err->userptr ? " userptr" : ""); |
|---|
| 394 | | - err_puts(m, err->engine != -1 ? " " : ""); |
|---|
| 395 | | - err_puts(m, engine_name(m->i915, err->engine)); |
|---|
| 396 | | - err_puts(m, i915_cache_level_str(m->i915, err->cache_level)); |
|---|
| 397 | | - |
|---|
| 398 | | - if (err->name) |
|---|
| 399 | | - err_printf(m, " (name: %d)", err->name); |
|---|
| 400 | | - if (err->fence_reg != I915_FENCE_REG_NONE) |
|---|
| 401 | | - err_printf(m, " (fence: %d)", err->fence_reg); |
|---|
| 402 | | - |
|---|
| 403 | | - err_puts(m, "\n"); |
|---|
| 404 | | - err++; |
|---|
| 405 | | - } |
|---|
| 406 | | -} |
|---|
| 407 | | - |
|---|
| 408 | 429 | static void error_print_instdone(struct drm_i915_error_state_buf *m, |
|---|
| 409 | | - const struct drm_i915_error_engine *ee) |
|---|
| 430 | + const struct intel_engine_coredump *ee) |
|---|
| 410 | 431 | { |
|---|
| 432 | + const struct sseu_dev_info *sseu = &ee->engine->gt->info.sseu; |
|---|
| 411 | 433 | int slice; |
|---|
| 412 | 434 | int subslice; |
|---|
| 413 | 435 | |
|---|
| 414 | 436 | err_printf(m, " INSTDONE: 0x%08x\n", |
|---|
| 415 | 437 | ee->instdone.instdone); |
|---|
| 416 | 438 | |
|---|
| 417 | | - if (ee->engine_id != RCS || INTEL_GEN(m->i915) <= 3) |
|---|
| 439 | + if (ee->engine->class != RENDER_CLASS || INTEL_GEN(m->i915) <= 3) |
|---|
| 418 | 440 | return; |
|---|
| 419 | 441 | |
|---|
| 420 | 442 | err_printf(m, " SC_INSTDONE: 0x%08x\n", |
|---|
| .. | .. |
|---|
| 423 | 445 | if (INTEL_GEN(m->i915) <= 6) |
|---|
| 424 | 446 | return; |
|---|
| 425 | 447 | |
|---|
| 426 | | - for_each_instdone_slice_subslice(m->i915, slice, subslice) |
|---|
| 448 | + for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) |
|---|
| 427 | 449 | err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", |
|---|
| 428 | 450 | slice, subslice, |
|---|
| 429 | 451 | ee->instdone.sampler[slice][subslice]); |
|---|
| 430 | 452 | |
|---|
| 431 | | - for_each_instdone_slice_subslice(m->i915, slice, subslice) |
|---|
| 453 | + for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) |
|---|
| 432 | 454 | err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", |
|---|
| 433 | 455 | slice, subslice, |
|---|
| 434 | 456 | ee->instdone.row[slice][subslice]); |
|---|
| 435 | | -} |
|---|
| 436 | 457 | |
|---|
| 437 | | -static const char *bannable(const struct drm_i915_error_context *ctx) |
|---|
| 438 | | -{ |
|---|
| 439 | | - return ctx->bannable ? "" : " (unbannable)"; |
|---|
| 458 | + if (INTEL_GEN(m->i915) < 12) |
|---|
| 459 | + return; |
|---|
| 460 | + |
|---|
| 461 | + err_printf(m, " SC_INSTDONE_EXTRA: 0x%08x\n", |
|---|
| 462 | + ee->instdone.slice_common_extra[0]); |
|---|
| 463 | + err_printf(m, " SC_INSTDONE_EXTRA2: 0x%08x\n", |
|---|
| 464 | + ee->instdone.slice_common_extra[1]); |
|---|
| 440 | 465 | } |
|---|
| 441 | 466 | |
|---|
| 442 | 467 | static void error_print_request(struct drm_i915_error_state_buf *m, |
|---|
| 443 | 468 | const char *prefix, |
|---|
| 444 | | - const struct drm_i915_error_request *erq, |
|---|
| 445 | | - const unsigned long epoch) |
|---|
| 469 | + const struct i915_request_coredump *erq) |
|---|
| 446 | 470 | { |
|---|
| 447 | 471 | if (!erq->seqno) |
|---|
| 448 | 472 | return; |
|---|
| 449 | 473 | |
|---|
| 450 | | - err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", |
|---|
| 451 | | - prefix, erq->pid, erq->ban_score, |
|---|
| 452 | | - erq->context, erq->seqno, erq->sched_attr.priority, |
|---|
| 453 | | - jiffies_to_msecs(erq->jiffies - epoch), |
|---|
| 454 | | - erq->start, erq->head, erq->tail); |
|---|
| 474 | + err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, head %08x, tail %08x\n", |
|---|
| 475 | + prefix, erq->pid, erq->context, erq->seqno, |
|---|
| 476 | + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, |
|---|
| 477 | + &erq->flags) ? "!" : "", |
|---|
| 478 | + test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, |
|---|
| 479 | + &erq->flags) ? "+" : "", |
|---|
| 480 | + erq->sched_attr.priority, |
|---|
| 481 | + erq->head, erq->tail); |
|---|
| 455 | 482 | } |
|---|
| 456 | 483 | |
|---|
| 457 | 484 | static void error_print_context(struct drm_i915_error_state_buf *m, |
|---|
| 458 | 485 | const char *header, |
|---|
| 459 | | - const struct drm_i915_error_context *ctx) |
|---|
| 486 | + const struct i915_gem_context_coredump *ctx) |
|---|
| 460 | 487 | { |
|---|
| 461 | | - err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d\n", |
|---|
| 462 | | - header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, |
|---|
| 463 | | - ctx->sched_attr.priority, ctx->ban_score, bannable(ctx), |
|---|
| 464 | | - ctx->guilty, ctx->active); |
|---|
| 488 | + const u32 period = RUNTIME_INFO(m->i915)->cs_timestamp_period_ns; |
|---|
| 489 | + |
|---|
| 490 | + err_printf(m, "%s%s[%d] prio %d, guilty %d active %d, runtime total %lluns, avg %lluns\n", |
|---|
| 491 | + header, ctx->comm, ctx->pid, ctx->sched_attr.priority, |
|---|
| 492 | + ctx->guilty, ctx->active, |
|---|
| 493 | + ctx->total_runtime * period, |
|---|
| 494 | + mul_u32_u32(ctx->avg_runtime, period)); |
|---|
| 495 | +} |
|---|
| 496 | + |
|---|
| 497 | +static struct i915_vma_coredump * |
|---|
| 498 | +__find_vma(struct i915_vma_coredump *vma, const char *name) |
|---|
| 499 | +{ |
|---|
| 500 | + while (vma) { |
|---|
| 501 | + if (strcmp(vma->name, name) == 0) |
|---|
| 502 | + return vma; |
|---|
| 503 | + vma = vma->next; |
|---|
| 504 | + } |
|---|
| 505 | + |
|---|
| 506 | + return NULL; |
|---|
| 507 | +} |
|---|
| 508 | + |
|---|
| 509 | +static struct i915_vma_coredump * |
|---|
| 510 | +find_batch(const struct intel_engine_coredump *ee) |
|---|
| 511 | +{ |
|---|
| 512 | + return __find_vma(ee->vma, "batch"); |
|---|
| 465 | 513 | } |
|---|
| 466 | 514 | |
|---|
| 467 | 515 | static void error_print_engine(struct drm_i915_error_state_buf *m, |
|---|
| 468 | | - const struct drm_i915_error_engine *ee, |
|---|
| 469 | | - const unsigned long epoch) |
|---|
| 516 | + const struct intel_engine_coredump *ee) |
|---|
| 470 | 517 | { |
|---|
| 518 | + struct i915_vma_coredump *batch; |
|---|
| 471 | 519 | int n; |
|---|
| 472 | 520 | |
|---|
| 473 | | - err_printf(m, "%s command stream:\n", |
|---|
| 474 | | - engine_name(m->i915, ee->engine_id)); |
|---|
| 475 | | - err_printf(m, " IDLE?: %s\n", yesno(ee->idle)); |
|---|
| 521 | + err_printf(m, "%s command stream:\n", ee->engine->name); |
|---|
| 522 | + err_printf(m, " CCID: 0x%08x\n", ee->ccid); |
|---|
| 476 | 523 | err_printf(m, " START: 0x%08x\n", ee->start); |
|---|
| 477 | 524 | err_printf(m, " HEAD: 0x%08x [0x%08x]\n", ee->head, ee->rq_head); |
|---|
| 478 | 525 | err_printf(m, " TAIL: 0x%08x [0x%08x, 0x%08x]\n", |
|---|
| .. | .. |
|---|
| 484 | 531 | (u32)(ee->acthd>>32), (u32)ee->acthd); |
|---|
| 485 | 532 | err_printf(m, " IPEIR: 0x%08x\n", ee->ipeir); |
|---|
| 486 | 533 | err_printf(m, " IPEHR: 0x%08x\n", ee->ipehr); |
|---|
| 534 | + err_printf(m, " ESR: 0x%08x\n", ee->esr); |
|---|
| 487 | 535 | |
|---|
| 488 | 536 | error_print_instdone(m, ee); |
|---|
| 489 | 537 | |
|---|
| 490 | | - if (ee->batchbuffer) { |
|---|
| 491 | | - u64 start = ee->batchbuffer->gtt_offset; |
|---|
| 492 | | - u64 end = start + ee->batchbuffer->gtt_size; |
|---|
| 538 | + batch = find_batch(ee); |
|---|
| 539 | + if (batch) { |
|---|
| 540 | + u64 start = batch->gtt_offset; |
|---|
| 541 | + u64 end = start + batch->gtt_size; |
|---|
| 493 | 542 | |
|---|
| 494 | 543 | err_printf(m, " batch: [0x%08x_%08x, 0x%08x_%08x]\n", |
|---|
| 495 | 544 | upper_32_bits(start), lower_32_bits(start), |
|---|
| .. | .. |
|---|
| 507 | 556 | if (INTEL_GEN(m->i915) >= 6) { |
|---|
| 508 | 557 | err_printf(m, " RC PSMI: 0x%08x\n", ee->rc_psmi); |
|---|
| 509 | 558 | err_printf(m, " FAULT_REG: 0x%08x\n", ee->fault_reg); |
|---|
| 510 | | - err_printf(m, " SYNC_0: 0x%08x\n", |
|---|
| 511 | | - ee->semaphore_mboxes[0]); |
|---|
| 512 | | - err_printf(m, " SYNC_1: 0x%08x\n", |
|---|
| 513 | | - ee->semaphore_mboxes[1]); |
|---|
| 514 | | - if (HAS_VEBOX(m->i915)) |
|---|
| 515 | | - err_printf(m, " SYNC_2: 0x%08x\n", |
|---|
| 516 | | - ee->semaphore_mboxes[2]); |
|---|
| 517 | 559 | } |
|---|
| 518 | | - if (USES_PPGTT(m->i915)) { |
|---|
| 560 | + if (HAS_PPGTT(m->i915)) { |
|---|
| 519 | 561 | err_printf(m, " GFX_MODE: 0x%08x\n", ee->vm_info.gfx_mode); |
|---|
| 520 | 562 | |
|---|
| 521 | 563 | if (INTEL_GEN(m->i915) >= 8) { |
|---|
| .. | .. |
|---|
| 528 | 570 | ee->vm_info.pp_dir_base); |
|---|
| 529 | 571 | } |
|---|
| 530 | 572 | } |
|---|
| 531 | | - err_printf(m, " seqno: 0x%08x\n", ee->seqno); |
|---|
| 532 | | - err_printf(m, " last_seqno: 0x%08x\n", ee->last_seqno); |
|---|
| 533 | | - err_printf(m, " waiting: %s\n", yesno(ee->waiting)); |
|---|
| 534 | | - err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head); |
|---|
| 535 | | - err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail); |
|---|
| 536 | | - err_printf(m, " hangcheck stall: %s\n", yesno(ee->hangcheck_stalled)); |
|---|
| 537 | | - err_printf(m, " hangcheck action: %s\n", |
|---|
| 538 | | - hangcheck_action_to_str(ee->hangcheck_action)); |
|---|
| 539 | | - err_printf(m, " hangcheck action timestamp: %dms (%lu%s)\n", |
|---|
| 540 | | - jiffies_to_msecs(ee->hangcheck_timestamp - epoch), |
|---|
| 541 | | - ee->hangcheck_timestamp, |
|---|
| 542 | | - ee->hangcheck_timestamp == epoch ? "; epoch" : ""); |
|---|
| 543 | 573 | err_printf(m, " engine reset count: %u\n", ee->reset_count); |
|---|
| 544 | 574 | |
|---|
| 545 | 575 | for (n = 0; n < ee->num_ports; n++) { |
|---|
| 546 | 576 | err_printf(m, " ELSP[%d]:", n); |
|---|
| 547 | | - error_print_request(m, " ", &ee->execlist[n], epoch); |
|---|
| 577 | + error_print_request(m, " ", &ee->execlist[n]); |
|---|
| 548 | 578 | } |
|---|
| 549 | 579 | |
|---|
| 550 | 580 | error_print_context(m, " Active context: ", &ee->context); |
|---|
| .. | .. |
|---|
| 559 | 589 | va_end(args); |
|---|
| 560 | 590 | } |
|---|
| 561 | 591 | |
|---|
| 562 | | -static void print_error_obj(struct drm_i915_error_state_buf *m, |
|---|
| 563 | | - struct intel_engine_cs *engine, |
|---|
| 564 | | - const char *name, |
|---|
| 565 | | - struct drm_i915_error_object *obj) |
|---|
| 592 | +static void print_error_vma(struct drm_i915_error_state_buf *m, |
|---|
| 593 | + const struct intel_engine_cs *engine, |
|---|
| 594 | + const struct i915_vma_coredump *vma) |
|---|
| 566 | 595 | { |
|---|
| 567 | 596 | char out[ASCII85_BUFSZ]; |
|---|
| 568 | 597 | int page; |
|---|
| 569 | 598 | |
|---|
| 570 | | - if (!obj) |
|---|
| 599 | + if (!vma) |
|---|
| 571 | 600 | return; |
|---|
| 572 | 601 | |
|---|
| 573 | | - if (name) { |
|---|
| 574 | | - err_printf(m, "%s --- %s = 0x%08x %08x\n", |
|---|
| 575 | | - engine ? engine->name : "global", name, |
|---|
| 576 | | - upper_32_bits(obj->gtt_offset), |
|---|
| 577 | | - lower_32_bits(obj->gtt_offset)); |
|---|
| 578 | | - } |
|---|
| 602 | + err_printf(m, "%s --- %s = 0x%08x %08x\n", |
|---|
| 603 | + engine ? engine->name : "global", vma->name, |
|---|
| 604 | + upper_32_bits(vma->gtt_offset), |
|---|
| 605 | + lower_32_bits(vma->gtt_offset)); |
|---|
| 606 | + |
|---|
| 607 | + if (vma->gtt_page_sizes > I915_GTT_PAGE_SIZE_4K) |
|---|
| 608 | + err_printf(m, "gtt_page_sizes = 0x%08x\n", vma->gtt_page_sizes); |
|---|
| 579 | 609 | |
|---|
| 580 | 610 | err_compression_marker(m); |
|---|
| 581 | | - for (page = 0; page < obj->page_count; page++) { |
|---|
| 611 | + for (page = 0; page < vma->page_count; page++) { |
|---|
| 582 | 612 | int i, len; |
|---|
| 583 | 613 | |
|---|
| 584 | 614 | len = PAGE_SIZE; |
|---|
| 585 | | - if (page == obj->page_count - 1) |
|---|
| 586 | | - len -= obj->unused; |
|---|
| 615 | + if (page == vma->page_count - 1) |
|---|
| 616 | + len -= vma->unused; |
|---|
| 587 | 617 | len = ascii85_encode_len(len); |
|---|
| 588 | 618 | |
|---|
| 589 | 619 | for (i = 0; i < len; i++) |
|---|
| 590 | | - err_puts(m, ascii85_encode(obj->pages[page][i], out)); |
|---|
| 620 | + err_puts(m, ascii85_encode(vma->pages[page][i], out)); |
|---|
| 591 | 621 | } |
|---|
| 592 | 622 | err_puts(m, "\n"); |
|---|
| 593 | 623 | } |
|---|
| 594 | 624 | |
|---|
| 595 | 625 | static void err_print_capabilities(struct drm_i915_error_state_buf *m, |
|---|
| 596 | | - const struct intel_device_info *info, |
|---|
| 597 | | - const struct intel_driver_caps *caps) |
|---|
| 626 | + struct i915_gpu_coredump *error) |
|---|
| 598 | 627 | { |
|---|
| 599 | 628 | struct drm_printer p = i915_error_printer(m); |
|---|
| 600 | 629 | |
|---|
| 601 | | - intel_device_info_dump_flags(info, &p); |
|---|
| 602 | | - intel_driver_caps_print(caps, &p); |
|---|
| 603 | | - intel_device_info_dump_topology(&info->sseu, &p); |
|---|
| 630 | + intel_device_info_print_static(&error->device_info, &p); |
|---|
| 631 | + intel_device_info_print_runtime(&error->runtime_info, &p); |
|---|
| 632 | + intel_driver_caps_print(&error->driver_caps, &p); |
|---|
| 604 | 633 | } |
|---|
| 605 | 634 | |
|---|
| 606 | 635 | static void err_print_params(struct drm_i915_error_state_buf *m, |
|---|
| .. | .. |
|---|
| 624 | 653 | } |
|---|
| 625 | 654 | |
|---|
| 626 | 655 | static void err_print_uc(struct drm_i915_error_state_buf *m, |
|---|
| 627 | | - const struct i915_error_uc *error_uc) |
|---|
| 656 | + const struct intel_uc_coredump *error_uc) |
|---|
| 628 | 657 | { |
|---|
| 629 | 658 | struct drm_printer p = i915_error_printer(m); |
|---|
| 630 | | - const struct i915_gpu_state *error = |
|---|
| 631 | | - container_of(error_uc, typeof(*error), uc); |
|---|
| 632 | | - |
|---|
| 633 | | - if (!error->device_info.has_guc) |
|---|
| 634 | | - return; |
|---|
| 635 | 659 | |
|---|
| 636 | 660 | intel_uc_fw_dump(&error_uc->guc_fw, &p); |
|---|
| 637 | 661 | intel_uc_fw_dump(&error_uc->huc_fw, &p); |
|---|
| 638 | | - print_error_obj(m, NULL, "GuC log buffer", error_uc->guc_log); |
|---|
| 662 | + print_error_vma(m, NULL, error_uc->guc_log); |
|---|
| 639 | 663 | } |
|---|
| 640 | 664 | |
|---|
| 641 | | -int i915_error_state_to_str(struct drm_i915_error_state_buf *m, |
|---|
| 642 | | - const struct i915_gpu_state *error) |
|---|
| 665 | +static void err_free_sgl(struct scatterlist *sgl) |
|---|
| 643 | 666 | { |
|---|
| 644 | | - struct drm_i915_private *dev_priv = m->i915; |
|---|
| 645 | | - struct drm_i915_error_object *obj; |
|---|
| 646 | | - struct timespec64 ts; |
|---|
| 647 | | - int i, j; |
|---|
| 667 | + while (sgl) { |
|---|
| 668 | + struct scatterlist *sg; |
|---|
| 648 | 669 | |
|---|
| 649 | | - if (!error) { |
|---|
| 650 | | - err_printf(m, "No error state collected\n"); |
|---|
| 651 | | - return 0; |
|---|
| 670 | + for (sg = sgl; !sg_is_chain(sg); sg++) { |
|---|
| 671 | + kfree(sg_virt(sg)); |
|---|
| 672 | + if (sg_is_last(sg)) |
|---|
| 673 | + break; |
|---|
| 674 | + } |
|---|
| 675 | + |
|---|
| 676 | + sg = sg_is_last(sg) ? NULL : sg_chain_ptr(sg); |
|---|
| 677 | + free_page((unsigned long)sgl); |
|---|
| 678 | + sgl = sg; |
|---|
| 652 | 679 | } |
|---|
| 680 | +} |
|---|
| 681 | + |
|---|
| 682 | +static void err_print_gt_info(struct drm_i915_error_state_buf *m, |
|---|
| 683 | + struct intel_gt_coredump *gt) |
|---|
| 684 | +{ |
|---|
| 685 | + struct drm_printer p = i915_error_printer(m); |
|---|
| 686 | + |
|---|
| 687 | + intel_gt_info_print(>->info, &p); |
|---|
| 688 | + intel_sseu_print_topology(>->info.sseu, &p); |
|---|
| 689 | +} |
|---|
| 690 | + |
|---|
| 691 | +static void err_print_gt(struct drm_i915_error_state_buf *m, |
|---|
| 692 | + struct intel_gt_coredump *gt) |
|---|
| 693 | +{ |
|---|
| 694 | + const struct intel_engine_coredump *ee; |
|---|
| 695 | + int i; |
|---|
| 696 | + |
|---|
| 697 | + err_printf(m, "GT awake: %s\n", yesno(gt->awake)); |
|---|
| 698 | + err_printf(m, "EIR: 0x%08x\n", gt->eir); |
|---|
| 699 | + err_printf(m, "IER: 0x%08x\n", gt->ier); |
|---|
| 700 | + for (i = 0; i < gt->ngtier; i++) |
|---|
| 701 | + err_printf(m, "GTIER[%d]: 0x%08x\n", i, gt->gtier[i]); |
|---|
| 702 | + err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er); |
|---|
| 703 | + err_printf(m, "FORCEWAKE: 0x%08x\n", gt->forcewake); |
|---|
| 704 | + err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr); |
|---|
| 705 | + |
|---|
| 706 | + for (i = 0; i < gt->nfence; i++) |
|---|
| 707 | + err_printf(m, " fence[%d] = %08llx\n", i, gt->fence[i]); |
|---|
| 708 | + |
|---|
| 709 | + if (IS_GEN_RANGE(m->i915, 6, 11)) { |
|---|
| 710 | + err_printf(m, "ERROR: 0x%08x\n", gt->error); |
|---|
| 711 | + err_printf(m, "DONE_REG: 0x%08x\n", gt->done_reg); |
|---|
| 712 | + } |
|---|
| 713 | + |
|---|
| 714 | + if (INTEL_GEN(m->i915) >= 8) |
|---|
| 715 | + err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n", |
|---|
| 716 | + gt->fault_data1, gt->fault_data0); |
|---|
| 717 | + |
|---|
| 718 | + if (IS_GEN(m->i915, 7)) |
|---|
| 719 | + err_printf(m, "ERR_INT: 0x%08x\n", gt->err_int); |
|---|
| 720 | + |
|---|
| 721 | + if (IS_GEN_RANGE(m->i915, 8, 11)) |
|---|
| 722 | + err_printf(m, "GTT_CACHE_EN: 0x%08x\n", gt->gtt_cache); |
|---|
| 723 | + |
|---|
| 724 | + if (IS_GEN(m->i915, 12)) |
|---|
| 725 | + err_printf(m, "AUX_ERR_DBG: 0x%08x\n", gt->aux_err); |
|---|
| 726 | + |
|---|
| 727 | + if (INTEL_GEN(m->i915) >= 12) { |
|---|
| 728 | + int i; |
|---|
| 729 | + |
|---|
| 730 | + for (i = 0; i < GEN12_SFC_DONE_MAX; i++) { |
|---|
| 731 | + /* |
|---|
| 732 | + * SFC_DONE resides in the VD forcewake domain, so it |
|---|
| 733 | + * only exists if the corresponding VCS engine is |
|---|
| 734 | + * present. |
|---|
| 735 | + */ |
|---|
| 736 | + if (!HAS_ENGINE(gt->_gt, _VCS(i * 2))) |
|---|
| 737 | + continue; |
|---|
| 738 | + |
|---|
| 739 | + err_printf(m, " SFC_DONE[%d]: 0x%08x\n", i, |
|---|
| 740 | + gt->sfc_done[i]); |
|---|
| 741 | + } |
|---|
| 742 | + |
|---|
| 743 | + err_printf(m, " GAM_DONE: 0x%08x\n", gt->gam_done); |
|---|
| 744 | + } |
|---|
| 745 | + |
|---|
| 746 | + for (ee = gt->engine; ee; ee = ee->next) { |
|---|
| 747 | + const struct i915_vma_coredump *vma; |
|---|
| 748 | + |
|---|
| 749 | + error_print_engine(m, ee); |
|---|
| 750 | + for (vma = ee->vma; vma; vma = vma->next) |
|---|
| 751 | + print_error_vma(m, ee->engine, vma); |
|---|
| 752 | + } |
|---|
| 753 | + |
|---|
| 754 | + if (gt->uc) |
|---|
| 755 | + err_print_uc(m, gt->uc); |
|---|
| 756 | + |
|---|
| 757 | + err_print_gt_info(m, gt); |
|---|
| 758 | +} |
|---|
| 759 | + |
|---|
| 760 | +static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, |
|---|
| 761 | + struct i915_gpu_coredump *error) |
|---|
| 762 | +{ |
|---|
| 763 | + const struct intel_engine_coredump *ee; |
|---|
| 764 | + struct timespec64 ts; |
|---|
| 653 | 765 | |
|---|
| 654 | 766 | if (*error->error_msg) |
|---|
| 655 | 767 | err_printf(m, "%s\n", error->error_msg); |
|---|
| 656 | | - err_printf(m, "Kernel: " UTS_RELEASE "\n"); |
|---|
| 768 | + err_printf(m, "Kernel: %s %s\n", |
|---|
| 769 | + init_utsname()->release, |
|---|
| 770 | + init_utsname()->machine); |
|---|
| 771 | + err_printf(m, "Driver: %s\n", DRIVER_DATE); |
|---|
| 657 | 772 | ts = ktime_to_timespec64(error->time); |
|---|
| 658 | 773 | err_printf(m, "Time: %lld s %ld us\n", |
|---|
| 659 | 774 | (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC); |
|---|
| .. | .. |
|---|
| 663 | 778 | ts = ktime_to_timespec64(error->uptime); |
|---|
| 664 | 779 | err_printf(m, "Uptime: %lld s %ld us\n", |
|---|
| 665 | 780 | (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC); |
|---|
| 666 | | - err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ); |
|---|
| 667 | | - err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n", |
|---|
| 668 | | - error->capture, |
|---|
| 669 | | - jiffies_to_msecs(jiffies - error->capture), |
|---|
| 670 | | - jiffies_to_msecs(error->capture - error->epoch)); |
|---|
| 781 | + err_printf(m, "Capture: %lu jiffies; %d ms ago\n", |
|---|
| 782 | + error->capture, jiffies_to_msecs(jiffies - error->capture)); |
|---|
| 671 | 783 | |
|---|
| 672 | | - for (i = 0; i < ARRAY_SIZE(error->engine); i++) { |
|---|
| 673 | | - if (error->engine[i].hangcheck_stalled && |
|---|
| 674 | | - error->engine[i].context.pid) { |
|---|
| 675 | | - err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n", |
|---|
| 676 | | - engine_name(m->i915, i), |
|---|
| 677 | | - error->engine[i].context.comm, |
|---|
| 678 | | - error->engine[i].context.pid, |
|---|
| 679 | | - error->engine[i].context.ban_score, |
|---|
| 680 | | - bannable(&error->engine[i].context)); |
|---|
| 681 | | - } |
|---|
| 682 | | - } |
|---|
| 784 | + for (ee = error->gt ? error->gt->engine : NULL; ee; ee = ee->next) |
|---|
| 785 | + err_printf(m, "Active process (on ring %s): %s [%d]\n", |
|---|
| 786 | + ee->engine->name, |
|---|
| 787 | + ee->context.comm, |
|---|
| 788 | + ee->context.pid); |
|---|
| 789 | + |
|---|
| 683 | 790 | err_printf(m, "Reset count: %u\n", error->reset_count); |
|---|
| 684 | 791 | err_printf(m, "Suspend count: %u\n", error->suspend_count); |
|---|
| 685 | 792 | err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform)); |
|---|
| 686 | | - err_print_pciid(m, error->i915); |
|---|
| 793 | + err_printf(m, "Subplatform: 0x%x\n", |
|---|
| 794 | + intel_subplatform(&error->runtime_info, |
|---|
| 795 | + error->device_info.platform)); |
|---|
| 796 | + err_print_pciid(m, m->i915); |
|---|
| 687 | 797 | |
|---|
| 688 | 798 | err_printf(m, "IOMMU enabled?: %d\n", error->iommu); |
|---|
| 689 | 799 | |
|---|
| 690 | | - if (HAS_CSR(dev_priv)) { |
|---|
| 691 | | - struct intel_csr *csr = &dev_priv->csr; |
|---|
| 800 | + if (HAS_CSR(m->i915)) { |
|---|
| 801 | + struct intel_csr *csr = &m->i915->csr; |
|---|
| 692 | 802 | |
|---|
| 693 | 803 | err_printf(m, "DMC loaded: %s\n", |
|---|
| 694 | 804 | yesno(csr->dmc_payload != NULL)); |
|---|
| .. | .. |
|---|
| 697 | 807 | CSR_VERSION_MINOR(csr->version)); |
|---|
| 698 | 808 | } |
|---|
| 699 | 809 | |
|---|
| 700 | | - err_printf(m, "GT awake: %s\n", yesno(error->awake)); |
|---|
| 701 | 810 | err_printf(m, "RPM wakelock: %s\n", yesno(error->wakelock)); |
|---|
| 702 | 811 | err_printf(m, "PM suspended: %s\n", yesno(error->suspended)); |
|---|
| 703 | | - err_printf(m, "EIR: 0x%08x\n", error->eir); |
|---|
| 704 | | - err_printf(m, "IER: 0x%08x\n", error->ier); |
|---|
| 705 | | - for (i = 0; i < error->ngtier; i++) |
|---|
| 706 | | - err_printf(m, "GTIER[%d]: 0x%08x\n", i, error->gtier[i]); |
|---|
| 707 | | - err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er); |
|---|
| 708 | | - err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake); |
|---|
| 709 | | - err_printf(m, "DERRMR: 0x%08x\n", error->derrmr); |
|---|
| 710 | | - err_printf(m, "CCID: 0x%08x\n", error->ccid); |
|---|
| 711 | | - err_printf(m, "Missed interrupts: 0x%08lx\n", dev_priv->gpu_error.missed_irq_rings); |
|---|
| 712 | 812 | |
|---|
| 713 | | - for (i = 0; i < error->nfence; i++) |
|---|
| 714 | | - err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]); |
|---|
| 715 | | - |
|---|
| 716 | | - if (INTEL_GEN(dev_priv) >= 6) { |
|---|
| 717 | | - err_printf(m, "ERROR: 0x%08x\n", error->error); |
|---|
| 718 | | - |
|---|
| 719 | | - if (INTEL_GEN(dev_priv) >= 8) |
|---|
| 720 | | - err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n", |
|---|
| 721 | | - error->fault_data1, error->fault_data0); |
|---|
| 722 | | - |
|---|
| 723 | | - err_printf(m, "DONE_REG: 0x%08x\n", error->done_reg); |
|---|
| 724 | | - } |
|---|
| 725 | | - |
|---|
| 726 | | - if (IS_GEN7(dev_priv)) |
|---|
| 727 | | - err_printf(m, "ERR_INT: 0x%08x\n", error->err_int); |
|---|
| 728 | | - |
|---|
| 729 | | - for (i = 0; i < ARRAY_SIZE(error->engine); i++) { |
|---|
| 730 | | - if (error->engine[i].engine_id != -1) |
|---|
| 731 | | - error_print_engine(m, &error->engine[i], error->epoch); |
|---|
| 732 | | - } |
|---|
| 733 | | - |
|---|
| 734 | | - for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) { |
|---|
| 735 | | - char buf[128]; |
|---|
| 736 | | - int len, first = 1; |
|---|
| 737 | | - |
|---|
| 738 | | - if (!error->active_vm[i]) |
|---|
| 739 | | - break; |
|---|
| 740 | | - |
|---|
| 741 | | - len = scnprintf(buf, sizeof(buf), "Active ("); |
|---|
| 742 | | - for (j = 0; j < ARRAY_SIZE(error->engine); j++) { |
|---|
| 743 | | - if (error->engine[j].vm != error->active_vm[i]) |
|---|
| 744 | | - continue; |
|---|
| 745 | | - |
|---|
| 746 | | - len += scnprintf(buf + len, sizeof(buf), "%s%s", |
|---|
| 747 | | - first ? "" : ", ", |
|---|
| 748 | | - dev_priv->engine[j]->name); |
|---|
| 749 | | - first = 0; |
|---|
| 750 | | - } |
|---|
| 751 | | - scnprintf(buf + len, sizeof(buf), ")"); |
|---|
| 752 | | - print_error_buffers(m, buf, |
|---|
| 753 | | - error->active_bo[i], |
|---|
| 754 | | - error->active_bo_count[i]); |
|---|
| 755 | | - } |
|---|
| 756 | | - |
|---|
| 757 | | - print_error_buffers(m, "Pinned (global)", |
|---|
| 758 | | - error->pinned_bo, |
|---|
| 759 | | - error->pinned_bo_count); |
|---|
| 760 | | - |
|---|
| 761 | | - for (i = 0; i < ARRAY_SIZE(error->engine); i++) { |
|---|
| 762 | | - const struct drm_i915_error_engine *ee = &error->engine[i]; |
|---|
| 763 | | - |
|---|
| 764 | | - obj = ee->batchbuffer; |
|---|
| 765 | | - if (obj) { |
|---|
| 766 | | - err_puts(m, dev_priv->engine[i]->name); |
|---|
| 767 | | - if (ee->context.pid) |
|---|
| 768 | | - err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d%s)", |
|---|
| 769 | | - ee->context.comm, |
|---|
| 770 | | - ee->context.pid, |
|---|
| 771 | | - ee->context.handle, |
|---|
| 772 | | - ee->context.hw_id, |
|---|
| 773 | | - ee->context.ban_score, |
|---|
| 774 | | - bannable(&ee->context)); |
|---|
| 775 | | - err_printf(m, " --- gtt_offset = 0x%08x %08x\n", |
|---|
| 776 | | - upper_32_bits(obj->gtt_offset), |
|---|
| 777 | | - lower_32_bits(obj->gtt_offset)); |
|---|
| 778 | | - print_error_obj(m, dev_priv->engine[i], NULL, obj); |
|---|
| 779 | | - } |
|---|
| 780 | | - |
|---|
| 781 | | - for (j = 0; j < ee->user_bo_count; j++) |
|---|
| 782 | | - print_error_obj(m, dev_priv->engine[i], |
|---|
| 783 | | - "user", ee->user_bo[j]); |
|---|
| 784 | | - |
|---|
| 785 | | - if (ee->num_requests) { |
|---|
| 786 | | - err_printf(m, "%s --- %d requests\n", |
|---|
| 787 | | - dev_priv->engine[i]->name, |
|---|
| 788 | | - ee->num_requests); |
|---|
| 789 | | - for (j = 0; j < ee->num_requests; j++) |
|---|
| 790 | | - error_print_request(m, " ", |
|---|
| 791 | | - &ee->requests[j], |
|---|
| 792 | | - error->epoch); |
|---|
| 793 | | - } |
|---|
| 794 | | - |
|---|
| 795 | | - if (IS_ERR(ee->waiters)) { |
|---|
| 796 | | - err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n", |
|---|
| 797 | | - dev_priv->engine[i]->name); |
|---|
| 798 | | - } else if (ee->num_waiters) { |
|---|
| 799 | | - err_printf(m, "%s --- %d waiters\n", |
|---|
| 800 | | - dev_priv->engine[i]->name, |
|---|
| 801 | | - ee->num_waiters); |
|---|
| 802 | | - for (j = 0; j < ee->num_waiters; j++) { |
|---|
| 803 | | - err_printf(m, " seqno 0x%08x for %s [%d]\n", |
|---|
| 804 | | - ee->waiters[j].seqno, |
|---|
| 805 | | - ee->waiters[j].comm, |
|---|
| 806 | | - ee->waiters[j].pid); |
|---|
| 807 | | - } |
|---|
| 808 | | - } |
|---|
| 809 | | - |
|---|
| 810 | | - print_error_obj(m, dev_priv->engine[i], |
|---|
| 811 | | - "ringbuffer", ee->ringbuffer); |
|---|
| 812 | | - |
|---|
| 813 | | - print_error_obj(m, dev_priv->engine[i], |
|---|
| 814 | | - "HW Status", ee->hws_page); |
|---|
| 815 | | - |
|---|
| 816 | | - print_error_obj(m, dev_priv->engine[i], |
|---|
| 817 | | - "HW context", ee->ctx); |
|---|
| 818 | | - |
|---|
| 819 | | - print_error_obj(m, dev_priv->engine[i], |
|---|
| 820 | | - "WA context", ee->wa_ctx); |
|---|
| 821 | | - |
|---|
| 822 | | - print_error_obj(m, dev_priv->engine[i], |
|---|
| 823 | | - "WA batchbuffer", ee->wa_batchbuffer); |
|---|
| 824 | | - |
|---|
| 825 | | - print_error_obj(m, dev_priv->engine[i], |
|---|
| 826 | | - "NULL context", ee->default_state); |
|---|
| 827 | | - } |
|---|
| 813 | + if (error->gt) |
|---|
| 814 | + err_print_gt(m, error->gt); |
|---|
| 828 | 815 | |
|---|
| 829 | 816 | if (error->overlay) |
|---|
| 830 | 817 | intel_overlay_print_error_state(m, error->overlay); |
|---|
| .. | .. |
|---|
| 832 | 819 | if (error->display) |
|---|
| 833 | 820 | intel_display_print_error_state(m, error->display); |
|---|
| 834 | 821 | |
|---|
| 835 | | - err_print_capabilities(m, &error->device_info, &error->driver_caps); |
|---|
| 822 | + err_print_capabilities(m, error); |
|---|
| 836 | 823 | err_print_params(m, &error->params); |
|---|
| 837 | | - err_print_uc(m, &error->uc); |
|---|
| 824 | +} |
|---|
| 838 | 825 | |
|---|
| 839 | | - if (m->bytes == 0 && m->err) |
|---|
| 840 | | - return m->err; |
|---|
| 826 | +static int err_print_to_sgl(struct i915_gpu_coredump *error) |
|---|
| 827 | +{ |
|---|
| 828 | + struct drm_i915_error_state_buf m; |
|---|
| 829 | + |
|---|
| 830 | + if (IS_ERR(error)) |
|---|
| 831 | + return PTR_ERR(error); |
|---|
| 832 | + |
|---|
| 833 | + if (READ_ONCE(error->sgl)) |
|---|
| 834 | + return 0; |
|---|
| 835 | + |
|---|
| 836 | + memset(&m, 0, sizeof(m)); |
|---|
| 837 | + m.i915 = error->i915; |
|---|
| 838 | + |
|---|
| 839 | + __err_print_to_sgl(&m, error); |
|---|
| 840 | + |
|---|
| 841 | + if (m.buf) { |
|---|
| 842 | + __sg_set_buf(m.cur++, m.buf, m.bytes, m.iter); |
|---|
| 843 | + m.bytes = 0; |
|---|
| 844 | + m.buf = NULL; |
|---|
| 845 | + } |
|---|
| 846 | + if (m.cur) { |
|---|
| 847 | + GEM_BUG_ON(m.end < m.cur); |
|---|
| 848 | + sg_mark_end(m.cur - 1); |
|---|
| 849 | + } |
|---|
| 850 | + GEM_BUG_ON(m.sgl && !m.cur); |
|---|
| 851 | + |
|---|
| 852 | + if (m.err) { |
|---|
| 853 | + err_free_sgl(m.sgl); |
|---|
| 854 | + return m.err; |
|---|
| 855 | + } |
|---|
| 856 | + |
|---|
| 857 | + if (cmpxchg(&error->sgl, NULL, m.sgl)) |
|---|
| 858 | + err_free_sgl(m.sgl); |
|---|
| 841 | 859 | |
|---|
| 842 | 860 | return 0; |
|---|
| 843 | 861 | } |
|---|
| 844 | 862 | |
|---|
| 845 | | -int i915_error_state_buf_init(struct drm_i915_error_state_buf *ebuf, |
|---|
| 846 | | - struct drm_i915_private *i915, |
|---|
| 847 | | - size_t count, loff_t pos) |
|---|
| 863 | +ssize_t i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error, |
|---|
| 864 | + char *buf, loff_t off, size_t rem) |
|---|
| 848 | 865 | { |
|---|
| 849 | | - memset(ebuf, 0, sizeof(*ebuf)); |
|---|
| 850 | | - ebuf->i915 = i915; |
|---|
| 866 | + struct scatterlist *sg; |
|---|
| 867 | + size_t count; |
|---|
| 868 | + loff_t pos; |
|---|
| 869 | + int err; |
|---|
| 851 | 870 | |
|---|
| 852 | | - /* We need to have enough room to store any i915_error_state printf |
|---|
| 853 | | - * so that we can move it to start position. |
|---|
| 854 | | - */ |
|---|
| 855 | | - ebuf->size = count + 1 > PAGE_SIZE ? count + 1 : PAGE_SIZE; |
|---|
| 856 | | - ebuf->buf = kmalloc(ebuf->size, |
|---|
| 857 | | - GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); |
|---|
| 871 | + if (!error || !rem) |
|---|
| 872 | + return 0; |
|---|
| 858 | 873 | |
|---|
| 859 | | - if (ebuf->buf == NULL) { |
|---|
| 860 | | - ebuf->size = PAGE_SIZE; |
|---|
| 861 | | - ebuf->buf = kmalloc(ebuf->size, GFP_KERNEL); |
|---|
| 874 | + err = err_print_to_sgl(error); |
|---|
| 875 | + if (err) |
|---|
| 876 | + return err; |
|---|
| 877 | + |
|---|
| 878 | + sg = READ_ONCE(error->fit); |
|---|
| 879 | + if (!sg || off < sg->dma_address) |
|---|
| 880 | + sg = error->sgl; |
|---|
| 881 | + if (!sg) |
|---|
| 882 | + return 0; |
|---|
| 883 | + |
|---|
| 884 | + pos = sg->dma_address; |
|---|
| 885 | + count = 0; |
|---|
| 886 | + do { |
|---|
| 887 | + size_t len, start; |
|---|
| 888 | + |
|---|
| 889 | + if (sg_is_chain(sg)) { |
|---|
| 890 | + sg = sg_chain_ptr(sg); |
|---|
| 891 | + GEM_BUG_ON(sg_is_chain(sg)); |
|---|
| 892 | + } |
|---|
| 893 | + |
|---|
| 894 | + len = sg->length; |
|---|
| 895 | + if (pos + len <= off) { |
|---|
| 896 | + pos += len; |
|---|
| 897 | + continue; |
|---|
| 898 | + } |
|---|
| 899 | + |
|---|
| 900 | + start = sg->offset; |
|---|
| 901 | + if (pos < off) { |
|---|
| 902 | + GEM_BUG_ON(off - pos > len); |
|---|
| 903 | + len -= off - pos; |
|---|
| 904 | + start += off - pos; |
|---|
| 905 | + pos = off; |
|---|
| 906 | + } |
|---|
| 907 | + |
|---|
| 908 | + len = min(len, rem); |
|---|
| 909 | + GEM_BUG_ON(!len || len > sg->length); |
|---|
| 910 | + |
|---|
| 911 | + memcpy(buf, page_address(sg_page(sg)) + start, len); |
|---|
| 912 | + |
|---|
| 913 | + count += len; |
|---|
| 914 | + pos += len; |
|---|
| 915 | + |
|---|
| 916 | + buf += len; |
|---|
| 917 | + rem -= len; |
|---|
| 918 | + if (!rem) { |
|---|
| 919 | + WRITE_ONCE(error->fit, sg); |
|---|
| 920 | + break; |
|---|
| 921 | + } |
|---|
| 922 | + } while (!sg_is_last(sg++)); |
|---|
| 923 | + |
|---|
| 924 | + return count; |
|---|
| 925 | +} |
|---|
| 926 | + |
|---|
| 927 | +static void i915_vma_coredump_free(struct i915_vma_coredump *vma) |
|---|
| 928 | +{ |
|---|
| 929 | + while (vma) { |
|---|
| 930 | + struct i915_vma_coredump *next = vma->next; |
|---|
| 931 | + int page; |
|---|
| 932 | + |
|---|
| 933 | + for (page = 0; page < vma->page_count; page++) |
|---|
| 934 | + free_page((unsigned long)vma->pages[page]); |
|---|
| 935 | + |
|---|
| 936 | + kfree(vma); |
|---|
| 937 | + vma = next; |
|---|
| 938 | + } |
|---|
| 939 | +} |
|---|
| 940 | + |
|---|
| 941 | +static void cleanup_params(struct i915_gpu_coredump *error) |
|---|
| 942 | +{ |
|---|
| 943 | + i915_params_free(&error->params); |
|---|
| 944 | +} |
|---|
| 945 | + |
|---|
| 946 | +static void cleanup_uc(struct intel_uc_coredump *uc) |
|---|
| 947 | +{ |
|---|
| 948 | + kfree(uc->guc_fw.path); |
|---|
| 949 | + kfree(uc->huc_fw.path); |
|---|
| 950 | + i915_vma_coredump_free(uc->guc_log); |
|---|
| 951 | + |
|---|
| 952 | + kfree(uc); |
|---|
| 953 | +} |
|---|
| 954 | + |
|---|
| 955 | +static void cleanup_gt(struct intel_gt_coredump *gt) |
|---|
| 956 | +{ |
|---|
| 957 | + while (gt->engine) { |
|---|
| 958 | + struct intel_engine_coredump *ee = gt->engine; |
|---|
| 959 | + |
|---|
| 960 | + gt->engine = ee->next; |
|---|
| 961 | + |
|---|
| 962 | + i915_vma_coredump_free(ee->vma); |
|---|
| 963 | + kfree(ee); |
|---|
| 862 | 964 | } |
|---|
| 863 | 965 | |
|---|
| 864 | | - if (ebuf->buf == NULL) { |
|---|
| 865 | | - ebuf->size = 128; |
|---|
| 866 | | - ebuf->buf = kmalloc(ebuf->size, GFP_KERNEL); |
|---|
| 867 | | - } |
|---|
| 966 | + if (gt->uc) |
|---|
| 967 | + cleanup_uc(gt->uc); |
|---|
| 868 | 968 | |
|---|
| 869 | | - if (ebuf->buf == NULL) |
|---|
| 870 | | - return -ENOMEM; |
|---|
| 871 | | - |
|---|
| 872 | | - ebuf->start = pos; |
|---|
| 873 | | - |
|---|
| 874 | | - return 0; |
|---|
| 969 | + kfree(gt); |
|---|
| 875 | 970 | } |
|---|
| 876 | 971 | |
|---|
| 877 | | -static void i915_error_object_free(struct drm_i915_error_object *obj) |
|---|
| 972 | +void __i915_gpu_coredump_free(struct kref *error_ref) |
|---|
| 878 | 973 | { |
|---|
| 879 | | - int page; |
|---|
| 880 | | - |
|---|
| 881 | | - if (obj == NULL) |
|---|
| 882 | | - return; |
|---|
| 883 | | - |
|---|
| 884 | | - for (page = 0; page < obj->page_count; page++) |
|---|
| 885 | | - free_page((unsigned long)obj->pages[page]); |
|---|
| 886 | | - |
|---|
| 887 | | - kfree(obj); |
|---|
| 888 | | -} |
|---|
| 889 | | - |
|---|
| 890 | | -static __always_inline void free_param(const char *type, void *x) |
|---|
| 891 | | -{ |
|---|
| 892 | | - if (!__builtin_strcmp(type, "char *")) |
|---|
| 893 | | - kfree(*(void **)x); |
|---|
| 894 | | -} |
|---|
| 895 | | - |
|---|
| 896 | | -static void cleanup_params(struct i915_gpu_state *error) |
|---|
| 897 | | -{ |
|---|
| 898 | | -#define FREE(T, x, ...) free_param(#T, &error->params.x); |
|---|
| 899 | | - I915_PARAMS_FOR_EACH(FREE); |
|---|
| 900 | | -#undef FREE |
|---|
| 901 | | -} |
|---|
| 902 | | - |
|---|
| 903 | | -static void cleanup_uc_state(struct i915_gpu_state *error) |
|---|
| 904 | | -{ |
|---|
| 905 | | - struct i915_error_uc *error_uc = &error->uc; |
|---|
| 906 | | - |
|---|
| 907 | | - kfree(error_uc->guc_fw.path); |
|---|
| 908 | | - kfree(error_uc->huc_fw.path); |
|---|
| 909 | | - i915_error_object_free(error_uc->guc_log); |
|---|
| 910 | | -} |
|---|
| 911 | | - |
|---|
| 912 | | -void __i915_gpu_state_free(struct kref *error_ref) |
|---|
| 913 | | -{ |
|---|
| 914 | | - struct i915_gpu_state *error = |
|---|
| 974 | + struct i915_gpu_coredump *error = |
|---|
| 915 | 975 | container_of(error_ref, typeof(*error), ref); |
|---|
| 916 | | - long i, j; |
|---|
| 917 | 976 | |
|---|
| 918 | | - for (i = 0; i < ARRAY_SIZE(error->engine); i++) { |
|---|
| 919 | | - struct drm_i915_error_engine *ee = &error->engine[i]; |
|---|
| 977 | + while (error->gt) { |
|---|
| 978 | + struct intel_gt_coredump *gt = error->gt; |
|---|
| 920 | 979 | |
|---|
| 921 | | - for (j = 0; j < ee->user_bo_count; j++) |
|---|
| 922 | | - i915_error_object_free(ee->user_bo[j]); |
|---|
| 923 | | - kfree(ee->user_bo); |
|---|
| 924 | | - |
|---|
| 925 | | - i915_error_object_free(ee->batchbuffer); |
|---|
| 926 | | - i915_error_object_free(ee->wa_batchbuffer); |
|---|
| 927 | | - i915_error_object_free(ee->ringbuffer); |
|---|
| 928 | | - i915_error_object_free(ee->hws_page); |
|---|
| 929 | | - i915_error_object_free(ee->ctx); |
|---|
| 930 | | - i915_error_object_free(ee->wa_ctx); |
|---|
| 931 | | - |
|---|
| 932 | | - kfree(ee->requests); |
|---|
| 933 | | - if (!IS_ERR_OR_NULL(ee->waiters)) |
|---|
| 934 | | - kfree(ee->waiters); |
|---|
| 980 | + error->gt = gt->next; |
|---|
| 981 | + cleanup_gt(gt); |
|---|
| 935 | 982 | } |
|---|
| 936 | | - |
|---|
| 937 | | - for (i = 0; i < ARRAY_SIZE(error->active_bo); i++) |
|---|
| 938 | | - kfree(error->active_bo[i]); |
|---|
| 939 | | - kfree(error->pinned_bo); |
|---|
| 940 | 983 | |
|---|
| 941 | 984 | kfree(error->overlay); |
|---|
| 942 | 985 | kfree(error->display); |
|---|
| 943 | 986 | |
|---|
| 944 | 987 | cleanup_params(error); |
|---|
| 945 | | - cleanup_uc_state(error); |
|---|
| 946 | 988 | |
|---|
| 989 | + err_free_sgl(error->sgl); |
|---|
| 947 | 990 | kfree(error); |
|---|
| 948 | 991 | } |
|---|
| 949 | 992 | |
|---|
| 950 | | -static struct drm_i915_error_object * |
|---|
| 951 | | -i915_error_object_create(struct drm_i915_private *i915, |
|---|
| 952 | | - struct i915_vma *vma) |
|---|
| 993 | +static struct i915_vma_coredump * |
|---|
| 994 | +i915_vma_coredump_create(const struct intel_gt *gt, |
|---|
| 995 | + const struct i915_vma *vma, |
|---|
| 996 | + const char *name, |
|---|
| 997 | + struct i915_vma_compress *compress) |
|---|
| 953 | 998 | { |
|---|
| 954 | | - struct i915_ggtt *ggtt = &i915->ggtt; |
|---|
| 999 | + struct i915_ggtt *ggtt = gt->ggtt; |
|---|
| 955 | 1000 | const u64 slot = ggtt->error_capture.start; |
|---|
| 956 | | - struct drm_i915_error_object *dst; |
|---|
| 957 | | - struct compress compress; |
|---|
| 1001 | + struct i915_vma_coredump *dst; |
|---|
| 958 | 1002 | unsigned long num_pages; |
|---|
| 959 | 1003 | struct sgt_iter iter; |
|---|
| 960 | | - dma_addr_t dma; |
|---|
| 961 | 1004 | int ret; |
|---|
| 962 | 1005 | |
|---|
| 963 | | - if (!vma) |
|---|
| 1006 | + might_sleep(); |
|---|
| 1007 | + |
|---|
| 1008 | + if (!vma || !vma->pages || !compress) |
|---|
| 964 | 1009 | return NULL; |
|---|
| 965 | 1010 | |
|---|
| 966 | 1011 | num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT; |
|---|
| 967 | 1012 | num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */ |
|---|
| 968 | | - dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), |
|---|
| 969 | | - GFP_ATOMIC | __GFP_NOWARN); |
|---|
| 1013 | + dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), ALLOW_FAIL); |
|---|
| 970 | 1014 | if (!dst) |
|---|
| 971 | 1015 | return NULL; |
|---|
| 972 | 1016 | |
|---|
| 1017 | + if (!compress_start(compress)) { |
|---|
| 1018 | + kfree(dst); |
|---|
| 1019 | + return NULL; |
|---|
| 1020 | + } |
|---|
| 1021 | + |
|---|
| 1022 | + strcpy(dst->name, name); |
|---|
| 1023 | + dst->next = NULL; |
|---|
| 1024 | + |
|---|
| 973 | 1025 | dst->gtt_offset = vma->node.start; |
|---|
| 974 | 1026 | dst->gtt_size = vma->node.size; |
|---|
| 1027 | + dst->gtt_page_sizes = vma->page_sizes.gtt; |
|---|
| 975 | 1028 | dst->num_pages = num_pages; |
|---|
| 976 | 1029 | dst->page_count = 0; |
|---|
| 977 | 1030 | dst->unused = 0; |
|---|
| 978 | 1031 | |
|---|
| 979 | | - if (!compress_init(&compress)) { |
|---|
| 980 | | - kfree(dst); |
|---|
| 981 | | - return NULL; |
|---|
| 982 | | - } |
|---|
| 983 | | - |
|---|
| 984 | 1032 | ret = -EINVAL; |
|---|
| 985 | | - for_each_sgt_dma(dma, iter, vma->pages) { |
|---|
| 1033 | + if (drm_mm_node_allocated(&ggtt->error_capture)) { |
|---|
| 986 | 1034 | void __iomem *s; |
|---|
| 1035 | + dma_addr_t dma; |
|---|
| 987 | 1036 | |
|---|
| 988 | | - ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0); |
|---|
| 1037 | + for_each_sgt_daddr(dma, iter, vma->pages) { |
|---|
| 1038 | + ggtt->vm.insert_page(&ggtt->vm, dma, slot, |
|---|
| 1039 | + I915_CACHE_NONE, 0); |
|---|
| 1040 | + mb(); |
|---|
| 989 | 1041 | |
|---|
| 990 | | - s = io_mapping_map_atomic_wc(&ggtt->iomap, slot); |
|---|
| 991 | | - ret = compress_page(&compress, (void __force *)s, dst); |
|---|
| 992 | | - io_mapping_unmap_atomic(s); |
|---|
| 993 | | - if (ret) |
|---|
| 994 | | - break; |
|---|
| 1042 | + s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE); |
|---|
| 1043 | + ret = compress_page(compress, |
|---|
| 1044 | + (void __force *)s, dst, |
|---|
| 1045 | + true); |
|---|
| 1046 | + io_mapping_unmap(s); |
|---|
| 1047 | + if (ret) |
|---|
| 1048 | + break; |
|---|
| 1049 | + } |
|---|
| 1050 | + } else if (i915_gem_object_is_lmem(vma->obj)) { |
|---|
| 1051 | + struct intel_memory_region *mem = vma->obj->mm.region; |
|---|
| 1052 | + dma_addr_t dma; |
|---|
| 1053 | + |
|---|
| 1054 | + for_each_sgt_daddr(dma, iter, vma->pages) { |
|---|
| 1055 | + void __iomem *s; |
|---|
| 1056 | + |
|---|
| 1057 | + s = io_mapping_map_wc(&mem->iomap, dma, PAGE_SIZE); |
|---|
| 1058 | + ret = compress_page(compress, |
|---|
| 1059 | + (void __force *)s, dst, |
|---|
| 1060 | + true); |
|---|
| 1061 | + io_mapping_unmap(s); |
|---|
| 1062 | + if (ret) |
|---|
| 1063 | + break; |
|---|
| 1064 | + } |
|---|
| 1065 | + } else { |
|---|
| 1066 | + struct page *page; |
|---|
| 1067 | + |
|---|
| 1068 | + for_each_sgt_page(page, iter, vma->pages) { |
|---|
| 1069 | + void *s; |
|---|
| 1070 | + |
|---|
| 1071 | + drm_clflush_pages(&page, 1); |
|---|
| 1072 | + |
|---|
| 1073 | + s = kmap(page); |
|---|
| 1074 | + ret = compress_page(compress, s, dst, false); |
|---|
| 1075 | + kunmap(page); |
|---|
| 1076 | + |
|---|
| 1077 | + drm_clflush_pages(&page, 1); |
|---|
| 1078 | + |
|---|
| 1079 | + if (ret) |
|---|
| 1080 | + break; |
|---|
| 1081 | + } |
|---|
| 995 | 1082 | } |
|---|
| 996 | 1083 | |
|---|
| 997 | | - if (ret || compress_flush(&compress, dst)) { |
|---|
| 1084 | + if (ret || compress_flush(compress, dst)) { |
|---|
| 998 | 1085 | while (dst->page_count--) |
|---|
| 999 | | - free_page((unsigned long)dst->pages[dst->page_count]); |
|---|
| 1086 | + pool_free(&compress->pool, dst->pages[dst->page_count]); |
|---|
| 1000 | 1087 | kfree(dst); |
|---|
| 1001 | 1088 | dst = NULL; |
|---|
| 1002 | 1089 | } |
|---|
| 1090 | + compress_finish(compress); |
|---|
| 1003 | 1091 | |
|---|
| 1004 | | - compress_fini(&compress, dst); |
|---|
| 1005 | | - ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE); |
|---|
| 1006 | 1092 | return dst; |
|---|
| 1007 | 1093 | } |
|---|
| 1008 | 1094 | |
|---|
| 1009 | | -/* The error capture is special as tries to run underneath the normal |
|---|
| 1010 | | - * locking rules - so we use the raw version of the i915_gem_active lookup. |
|---|
| 1011 | | - */ |
|---|
| 1012 | | -static inline uint32_t |
|---|
| 1013 | | -__active_get_seqno(struct i915_gem_active *active) |
|---|
| 1095 | +static void gt_record_fences(struct intel_gt_coredump *gt) |
|---|
| 1014 | 1096 | { |
|---|
| 1015 | | - struct i915_request *request; |
|---|
| 1016 | | - |
|---|
| 1017 | | - request = __i915_gem_active_peek(active); |
|---|
| 1018 | | - return request ? request->global_seqno : 0; |
|---|
| 1019 | | -} |
|---|
| 1020 | | - |
|---|
| 1021 | | -static inline int |
|---|
| 1022 | | -__active_get_engine_id(struct i915_gem_active *active) |
|---|
| 1023 | | -{ |
|---|
| 1024 | | - struct i915_request *request; |
|---|
| 1025 | | - |
|---|
| 1026 | | - request = __i915_gem_active_peek(active); |
|---|
| 1027 | | - return request ? request->engine->id : -1; |
|---|
| 1028 | | -} |
|---|
| 1029 | | - |
|---|
| 1030 | | -static void capture_bo(struct drm_i915_error_buffer *err, |
|---|
| 1031 | | - struct i915_vma *vma) |
|---|
| 1032 | | -{ |
|---|
| 1033 | | - struct drm_i915_gem_object *obj = vma->obj; |
|---|
| 1034 | | - |
|---|
| 1035 | | - err->size = obj->base.size; |
|---|
| 1036 | | - err->name = obj->base.name; |
|---|
| 1037 | | - |
|---|
| 1038 | | - err->wseqno = __active_get_seqno(&obj->frontbuffer_write); |
|---|
| 1039 | | - err->engine = __active_get_engine_id(&obj->frontbuffer_write); |
|---|
| 1040 | | - |
|---|
| 1041 | | - err->gtt_offset = vma->node.start; |
|---|
| 1042 | | - err->read_domains = obj->read_domains; |
|---|
| 1043 | | - err->write_domain = obj->write_domain; |
|---|
| 1044 | | - err->fence_reg = vma->fence ? vma->fence->id : -1; |
|---|
| 1045 | | - err->tiling = i915_gem_object_get_tiling(obj); |
|---|
| 1046 | | - err->dirty = obj->mm.dirty; |
|---|
| 1047 | | - err->purgeable = obj->mm.madv != I915_MADV_WILLNEED; |
|---|
| 1048 | | - err->userptr = obj->userptr.mm != NULL; |
|---|
| 1049 | | - err->cache_level = obj->cache_level; |
|---|
| 1050 | | -} |
|---|
| 1051 | | - |
|---|
| 1052 | | -static u32 capture_error_bo(struct drm_i915_error_buffer *err, |
|---|
| 1053 | | - int count, struct list_head *head, |
|---|
| 1054 | | - bool pinned_only) |
|---|
| 1055 | | -{ |
|---|
| 1056 | | - struct i915_vma *vma; |
|---|
| 1057 | | - int i = 0; |
|---|
| 1058 | | - |
|---|
| 1059 | | - list_for_each_entry(vma, head, vm_link) { |
|---|
| 1060 | | - if (!vma->obj) |
|---|
| 1061 | | - continue; |
|---|
| 1062 | | - |
|---|
| 1063 | | - if (pinned_only && !i915_vma_is_pinned(vma)) |
|---|
| 1064 | | - continue; |
|---|
| 1065 | | - |
|---|
| 1066 | | - capture_bo(err++, vma); |
|---|
| 1067 | | - if (++i == count) |
|---|
| 1068 | | - break; |
|---|
| 1069 | | - } |
|---|
| 1070 | | - |
|---|
| 1071 | | - return i; |
|---|
| 1072 | | -} |
|---|
| 1073 | | - |
|---|
| 1074 | | -/* Generate a semi-unique error code. The code is not meant to have meaning, The |
|---|
| 1075 | | - * code's only purpose is to try to prevent false duplicated bug reports by |
|---|
| 1076 | | - * grossly estimating a GPU error state. |
|---|
| 1077 | | - * |
|---|
| 1078 | | - * TODO Ideally, hashing the batchbuffer would be a very nice way to determine |
|---|
| 1079 | | - * the hang if we could strip the GTT offset information from it. |
|---|
| 1080 | | - * |
|---|
| 1081 | | - * It's only a small step better than a random number in its current form. |
|---|
| 1082 | | - */ |
|---|
| 1083 | | -static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, |
|---|
| 1084 | | - struct i915_gpu_state *error, |
|---|
| 1085 | | - int *engine_id) |
|---|
| 1086 | | -{ |
|---|
| 1087 | | - uint32_t error_code = 0; |
|---|
| 1097 | + struct i915_ggtt *ggtt = gt->_gt->ggtt; |
|---|
| 1098 | + struct intel_uncore *uncore = gt->_gt->uncore; |
|---|
| 1088 | 1099 | int i; |
|---|
| 1089 | 1100 | |
|---|
| 1090 | | - /* IPEHR would be an ideal way to detect errors, as it's the gross |
|---|
| 1091 | | - * measure of "the command that hung." However, has some very common |
|---|
| 1092 | | - * synchronization commands which almost always appear in the case |
|---|
| 1093 | | - * strictly a client bug. Use instdone to differentiate those some. |
|---|
| 1094 | | - */ |
|---|
| 1095 | | - for (i = 0; i < I915_NUM_ENGINES; i++) { |
|---|
| 1096 | | - if (error->engine[i].hangcheck_stalled) { |
|---|
| 1097 | | - if (engine_id) |
|---|
| 1098 | | - *engine_id = i; |
|---|
| 1099 | | - |
|---|
| 1100 | | - return error->engine[i].ipehr ^ |
|---|
| 1101 | | - error->engine[i].instdone.instdone; |
|---|
| 1102 | | - } |
|---|
| 1103 | | - } |
|---|
| 1104 | | - |
|---|
| 1105 | | - return error_code; |
|---|
| 1106 | | -} |
|---|
| 1107 | | - |
|---|
| 1108 | | -static void gem_record_fences(struct i915_gpu_state *error) |
|---|
| 1109 | | -{ |
|---|
| 1110 | | - struct drm_i915_private *dev_priv = error->i915; |
|---|
| 1111 | | - int i; |
|---|
| 1112 | | - |
|---|
| 1113 | | - if (INTEL_GEN(dev_priv) >= 6) { |
|---|
| 1114 | | - for (i = 0; i < dev_priv->num_fence_regs; i++) |
|---|
| 1115 | | - error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i)); |
|---|
| 1116 | | - } else if (INTEL_GEN(dev_priv) >= 4) { |
|---|
| 1117 | | - for (i = 0; i < dev_priv->num_fence_regs; i++) |
|---|
| 1118 | | - error->fence[i] = I915_READ64(FENCE_REG_965_LO(i)); |
|---|
| 1101 | + if (INTEL_GEN(uncore->i915) >= 6) { |
|---|
| 1102 | + for (i = 0; i < ggtt->num_fences; i++) |
|---|
| 1103 | + gt->fence[i] = |
|---|
| 1104 | + intel_uncore_read64(uncore, |
|---|
| 1105 | + FENCE_REG_GEN6_LO(i)); |
|---|
| 1106 | + } else if (INTEL_GEN(uncore->i915) >= 4) { |
|---|
| 1107 | + for (i = 0; i < ggtt->num_fences; i++) |
|---|
| 1108 | + gt->fence[i] = |
|---|
| 1109 | + intel_uncore_read64(uncore, |
|---|
| 1110 | + FENCE_REG_965_LO(i)); |
|---|
| 1119 | 1111 | } else { |
|---|
| 1120 | | - for (i = 0; i < dev_priv->num_fence_regs; i++) |
|---|
| 1121 | | - error->fence[i] = I915_READ(FENCE_REG(i)); |
|---|
| 1112 | + for (i = 0; i < ggtt->num_fences; i++) |
|---|
| 1113 | + gt->fence[i] = |
|---|
| 1114 | + intel_uncore_read(uncore, FENCE_REG(i)); |
|---|
| 1122 | 1115 | } |
|---|
| 1123 | | - error->nfence = i; |
|---|
| 1116 | + gt->nfence = i; |
|---|
| 1124 | 1117 | } |
|---|
| 1125 | 1118 | |
|---|
| 1126 | | -static void gen6_record_semaphore_state(struct intel_engine_cs *engine, |
|---|
| 1127 | | - struct drm_i915_error_engine *ee) |
|---|
| 1119 | +static void engine_record_registers(struct intel_engine_coredump *ee) |
|---|
| 1128 | 1120 | { |
|---|
| 1129 | | - struct drm_i915_private *dev_priv = engine->i915; |
|---|
| 1121 | + const struct intel_engine_cs *engine = ee->engine; |
|---|
| 1122 | + struct drm_i915_private *i915 = engine->i915; |
|---|
| 1130 | 1123 | |
|---|
| 1131 | | - ee->semaphore_mboxes[0] = I915_READ(RING_SYNC_0(engine->mmio_base)); |
|---|
| 1132 | | - ee->semaphore_mboxes[1] = I915_READ(RING_SYNC_1(engine->mmio_base)); |
|---|
| 1133 | | - if (HAS_VEBOX(dev_priv)) |
|---|
| 1134 | | - ee->semaphore_mboxes[2] = |
|---|
| 1135 | | - I915_READ(RING_SYNC_2(engine->mmio_base)); |
|---|
| 1136 | | -} |
|---|
| 1124 | + if (INTEL_GEN(i915) >= 6) { |
|---|
| 1125 | + ee->rc_psmi = ENGINE_READ(engine, RING_PSMI_CTL); |
|---|
| 1137 | 1126 | |
|---|
| 1138 | | -static void error_record_engine_waiters(struct intel_engine_cs *engine, |
|---|
| 1139 | | - struct drm_i915_error_engine *ee) |
|---|
| 1140 | | -{ |
|---|
| 1141 | | - struct intel_breadcrumbs *b = &engine->breadcrumbs; |
|---|
| 1142 | | - struct drm_i915_error_waiter *waiter; |
|---|
| 1143 | | - struct rb_node *rb; |
|---|
| 1144 | | - int count; |
|---|
| 1145 | | - |
|---|
| 1146 | | - ee->num_waiters = 0; |
|---|
| 1147 | | - ee->waiters = NULL; |
|---|
| 1148 | | - |
|---|
| 1149 | | - if (RB_EMPTY_ROOT(&b->waiters)) |
|---|
| 1150 | | - return; |
|---|
| 1151 | | - |
|---|
| 1152 | | - if (!spin_trylock_irq(&b->rb_lock)) { |
|---|
| 1153 | | - ee->waiters = ERR_PTR(-EDEADLK); |
|---|
| 1154 | | - return; |
|---|
| 1127 | + if (INTEL_GEN(i915) >= 12) |
|---|
| 1128 | + ee->fault_reg = intel_uncore_read(engine->uncore, |
|---|
| 1129 | + GEN12_RING_FAULT_REG); |
|---|
| 1130 | + else if (INTEL_GEN(i915) >= 8) |
|---|
| 1131 | + ee->fault_reg = intel_uncore_read(engine->uncore, |
|---|
| 1132 | + GEN8_RING_FAULT_REG); |
|---|
| 1133 | + else |
|---|
| 1134 | + ee->fault_reg = GEN6_RING_FAULT_REG_READ(engine); |
|---|
| 1155 | 1135 | } |
|---|
| 1156 | 1136 | |
|---|
| 1157 | | - count = 0; |
|---|
| 1158 | | - for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb)) |
|---|
| 1159 | | - count++; |
|---|
| 1160 | | - spin_unlock_irq(&b->rb_lock); |
|---|
| 1161 | | - |
|---|
| 1162 | | - waiter = NULL; |
|---|
| 1163 | | - if (count) |
|---|
| 1164 | | - waiter = kmalloc_array(count, |
|---|
| 1165 | | - sizeof(struct drm_i915_error_waiter), |
|---|
| 1166 | | - GFP_ATOMIC); |
|---|
| 1167 | | - if (!waiter) |
|---|
| 1168 | | - return; |
|---|
| 1169 | | - |
|---|
| 1170 | | - if (!spin_trylock_irq(&b->rb_lock)) { |
|---|
| 1171 | | - kfree(waiter); |
|---|
| 1172 | | - ee->waiters = ERR_PTR(-EDEADLK); |
|---|
| 1173 | | - return; |
|---|
| 1174 | | - } |
|---|
| 1175 | | - |
|---|
| 1176 | | - ee->waiters = waiter; |
|---|
| 1177 | | - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { |
|---|
| 1178 | | - struct intel_wait *w = rb_entry(rb, typeof(*w), node); |
|---|
| 1179 | | - |
|---|
| 1180 | | - strcpy(waiter->comm, w->tsk->comm); |
|---|
| 1181 | | - waiter->pid = w->tsk->pid; |
|---|
| 1182 | | - waiter->seqno = w->seqno; |
|---|
| 1183 | | - waiter++; |
|---|
| 1184 | | - |
|---|
| 1185 | | - if (++ee->num_waiters == count) |
|---|
| 1186 | | - break; |
|---|
| 1187 | | - } |
|---|
| 1188 | | - spin_unlock_irq(&b->rb_lock); |
|---|
| 1189 | | -} |
|---|
| 1190 | | - |
|---|
| 1191 | | -static void error_record_engine_registers(struct i915_gpu_state *error, |
|---|
| 1192 | | - struct intel_engine_cs *engine, |
|---|
| 1193 | | - struct drm_i915_error_engine *ee) |
|---|
| 1194 | | -{ |
|---|
| 1195 | | - struct drm_i915_private *dev_priv = engine->i915; |
|---|
| 1196 | | - |
|---|
| 1197 | | - if (INTEL_GEN(dev_priv) >= 6) { |
|---|
| 1198 | | - ee->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base)); |
|---|
| 1199 | | - if (INTEL_GEN(dev_priv) >= 8) { |
|---|
| 1200 | | - ee->fault_reg = I915_READ(GEN8_RING_FAULT_REG); |
|---|
| 1201 | | - } else { |
|---|
| 1202 | | - gen6_record_semaphore_state(engine, ee); |
|---|
| 1203 | | - ee->fault_reg = I915_READ(RING_FAULT_REG(engine)); |
|---|
| 1137 | + if (INTEL_GEN(i915) >= 4) { |
|---|
| 1138 | + ee->esr = ENGINE_READ(engine, RING_ESR); |
|---|
| 1139 | + ee->faddr = ENGINE_READ(engine, RING_DMA_FADD); |
|---|
| 1140 | + ee->ipeir = ENGINE_READ(engine, RING_IPEIR); |
|---|
| 1141 | + ee->ipehr = ENGINE_READ(engine, RING_IPEHR); |
|---|
| 1142 | + ee->instps = ENGINE_READ(engine, RING_INSTPS); |
|---|
| 1143 | + ee->bbaddr = ENGINE_READ(engine, RING_BBADDR); |
|---|
| 1144 | + ee->ccid = ENGINE_READ(engine, CCID); |
|---|
| 1145 | + if (INTEL_GEN(i915) >= 8) { |
|---|
| 1146 | + ee->faddr |= (u64)ENGINE_READ(engine, RING_DMA_FADD_UDW) << 32; |
|---|
| 1147 | + ee->bbaddr |= (u64)ENGINE_READ(engine, RING_BBADDR_UDW) << 32; |
|---|
| 1204 | 1148 | } |
|---|
| 1205 | | - } |
|---|
| 1206 | | - |
|---|
| 1207 | | - if (INTEL_GEN(dev_priv) >= 4) { |
|---|
| 1208 | | - ee->faddr = I915_READ(RING_DMA_FADD(engine->mmio_base)); |
|---|
| 1209 | | - ee->ipeir = I915_READ(RING_IPEIR(engine->mmio_base)); |
|---|
| 1210 | | - ee->ipehr = I915_READ(RING_IPEHR(engine->mmio_base)); |
|---|
| 1211 | | - ee->instps = I915_READ(RING_INSTPS(engine->mmio_base)); |
|---|
| 1212 | | - ee->bbaddr = I915_READ(RING_BBADDR(engine->mmio_base)); |
|---|
| 1213 | | - if (INTEL_GEN(dev_priv) >= 8) { |
|---|
| 1214 | | - ee->faddr |= (u64) I915_READ(RING_DMA_FADD_UDW(engine->mmio_base)) << 32; |
|---|
| 1215 | | - ee->bbaddr |= (u64) I915_READ(RING_BBADDR_UDW(engine->mmio_base)) << 32; |
|---|
| 1216 | | - } |
|---|
| 1217 | | - ee->bbstate = I915_READ(RING_BBSTATE(engine->mmio_base)); |
|---|
| 1149 | + ee->bbstate = ENGINE_READ(engine, RING_BBSTATE); |
|---|
| 1218 | 1150 | } else { |
|---|
| 1219 | | - ee->faddr = I915_READ(DMA_FADD_I8XX); |
|---|
| 1220 | | - ee->ipeir = I915_READ(IPEIR); |
|---|
| 1221 | | - ee->ipehr = I915_READ(IPEHR); |
|---|
| 1151 | + ee->faddr = ENGINE_READ(engine, DMA_FADD_I8XX); |
|---|
| 1152 | + ee->ipeir = ENGINE_READ(engine, IPEIR); |
|---|
| 1153 | + ee->ipehr = ENGINE_READ(engine, IPEHR); |
|---|
| 1222 | 1154 | } |
|---|
| 1223 | 1155 | |
|---|
| 1224 | 1156 | intel_engine_get_instdone(engine, &ee->instdone); |
|---|
| 1225 | 1157 | |
|---|
| 1226 | | - ee->waiting = intel_engine_has_waiter(engine); |
|---|
| 1227 | | - ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); |
|---|
| 1158 | + ee->instpm = ENGINE_READ(engine, RING_INSTPM); |
|---|
| 1228 | 1159 | ee->acthd = intel_engine_get_active_head(engine); |
|---|
| 1229 | | - ee->seqno = intel_engine_get_seqno(engine); |
|---|
| 1230 | | - ee->last_seqno = intel_engine_last_submit(engine); |
|---|
| 1231 | | - ee->start = I915_READ_START(engine); |
|---|
| 1232 | | - ee->head = I915_READ_HEAD(engine); |
|---|
| 1233 | | - ee->tail = I915_READ_TAIL(engine); |
|---|
| 1234 | | - ee->ctl = I915_READ_CTL(engine); |
|---|
| 1235 | | - if (INTEL_GEN(dev_priv) > 2) |
|---|
| 1236 | | - ee->mode = I915_READ_MODE(engine); |
|---|
| 1160 | + ee->start = ENGINE_READ(engine, RING_START); |
|---|
| 1161 | + ee->head = ENGINE_READ(engine, RING_HEAD); |
|---|
| 1162 | + ee->tail = ENGINE_READ(engine, RING_TAIL); |
|---|
| 1163 | + ee->ctl = ENGINE_READ(engine, RING_CTL); |
|---|
| 1164 | + if (INTEL_GEN(i915) > 2) |
|---|
| 1165 | + ee->mode = ENGINE_READ(engine, RING_MI_MODE); |
|---|
| 1237 | 1166 | |
|---|
| 1238 | | - if (!HWS_NEEDS_PHYSICAL(dev_priv)) { |
|---|
| 1167 | + if (!HWS_NEEDS_PHYSICAL(i915)) { |
|---|
| 1239 | 1168 | i915_reg_t mmio; |
|---|
| 1240 | 1169 | |
|---|
| 1241 | | - if (IS_GEN7(dev_priv)) { |
|---|
| 1170 | + if (IS_GEN(i915, 7)) { |
|---|
| 1242 | 1171 | switch (engine->id) { |
|---|
| 1243 | 1172 | default: |
|---|
| 1244 | | - case RCS: |
|---|
| 1173 | + MISSING_CASE(engine->id); |
|---|
| 1174 | + fallthrough; |
|---|
| 1175 | + case RCS0: |
|---|
| 1245 | 1176 | mmio = RENDER_HWS_PGA_GEN7; |
|---|
| 1246 | 1177 | break; |
|---|
| 1247 | | - case BCS: |
|---|
| 1178 | + case BCS0: |
|---|
| 1248 | 1179 | mmio = BLT_HWS_PGA_GEN7; |
|---|
| 1249 | 1180 | break; |
|---|
| 1250 | | - case VCS: |
|---|
| 1181 | + case VCS0: |
|---|
| 1251 | 1182 | mmio = BSD_HWS_PGA_GEN7; |
|---|
| 1252 | 1183 | break; |
|---|
| 1253 | | - case VECS: |
|---|
| 1184 | + case VECS0: |
|---|
| 1254 | 1185 | mmio = VEBOX_HWS_PGA_GEN7; |
|---|
| 1255 | 1186 | break; |
|---|
| 1256 | 1187 | } |
|---|
| 1257 | | - } else if (IS_GEN6(engine->i915)) { |
|---|
| 1188 | + } else if (IS_GEN(engine->i915, 6)) { |
|---|
| 1258 | 1189 | mmio = RING_HWS_PGA_GEN6(engine->mmio_base); |
|---|
| 1259 | 1190 | } else { |
|---|
| 1260 | 1191 | /* XXX: gen8 returns to sanity */ |
|---|
| 1261 | 1192 | mmio = RING_HWS_PGA(engine->mmio_base); |
|---|
| 1262 | 1193 | } |
|---|
| 1263 | 1194 | |
|---|
| 1264 | | - ee->hws = I915_READ(mmio); |
|---|
| 1195 | + ee->hws = intel_uncore_read(engine->uncore, mmio); |
|---|
| 1265 | 1196 | } |
|---|
| 1266 | 1197 | |
|---|
| 1267 | | - ee->idle = intel_engine_is_idle(engine); |
|---|
| 1268 | | - ee->hangcheck_timestamp = engine->hangcheck.action_timestamp; |
|---|
| 1269 | | - ee->hangcheck_action = engine->hangcheck.action; |
|---|
| 1270 | | - ee->hangcheck_stalled = engine->hangcheck.stalled; |
|---|
| 1271 | | - ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error, |
|---|
| 1272 | | - engine); |
|---|
| 1198 | + ee->reset_count = i915_reset_engine_count(&i915->gpu_error, engine); |
|---|
| 1273 | 1199 | |
|---|
| 1274 | | - if (USES_PPGTT(dev_priv)) { |
|---|
| 1200 | + if (HAS_PPGTT(i915)) { |
|---|
| 1275 | 1201 | int i; |
|---|
| 1276 | 1202 | |
|---|
| 1277 | | - ee->vm_info.gfx_mode = I915_READ(RING_MODE_GEN7(engine)); |
|---|
| 1203 | + ee->vm_info.gfx_mode = ENGINE_READ(engine, RING_MODE_GEN7); |
|---|
| 1278 | 1204 | |
|---|
| 1279 | | - if (IS_GEN6(dev_priv)) |
|---|
| 1205 | + if (IS_GEN(i915, 6)) { |
|---|
| 1280 | 1206 | ee->vm_info.pp_dir_base = |
|---|
| 1281 | | - I915_READ(RING_PP_DIR_BASE_READ(engine)); |
|---|
| 1282 | | - else if (IS_GEN7(dev_priv)) |
|---|
| 1207 | + ENGINE_READ(engine, RING_PP_DIR_BASE_READ); |
|---|
| 1208 | + } else if (IS_GEN(i915, 7)) { |
|---|
| 1283 | 1209 | ee->vm_info.pp_dir_base = |
|---|
| 1284 | | - I915_READ(RING_PP_DIR_BASE(engine)); |
|---|
| 1285 | | - else if (INTEL_GEN(dev_priv) >= 8) |
|---|
| 1210 | + ENGINE_READ(engine, RING_PP_DIR_BASE); |
|---|
| 1211 | + } else if (INTEL_GEN(i915) >= 8) { |
|---|
| 1212 | + u32 base = engine->mmio_base; |
|---|
| 1213 | + |
|---|
| 1286 | 1214 | for (i = 0; i < 4; i++) { |
|---|
| 1287 | 1215 | ee->vm_info.pdp[i] = |
|---|
| 1288 | | - I915_READ(GEN8_RING_PDP_UDW(engine, i)); |
|---|
| 1216 | + intel_uncore_read(engine->uncore, |
|---|
| 1217 | + GEN8_RING_PDP_UDW(base, i)); |
|---|
| 1289 | 1218 | ee->vm_info.pdp[i] <<= 32; |
|---|
| 1290 | 1219 | ee->vm_info.pdp[i] |= |
|---|
| 1291 | | - I915_READ(GEN8_RING_PDP_LDW(engine, i)); |
|---|
| 1220 | + intel_uncore_read(engine->uncore, |
|---|
| 1221 | + GEN8_RING_PDP_LDW(base, i)); |
|---|
| 1292 | 1222 | } |
|---|
| 1223 | + } |
|---|
| 1293 | 1224 | } |
|---|
| 1294 | 1225 | } |
|---|
| 1295 | 1226 | |
|---|
| 1296 | | -static void record_request(struct i915_request *request, |
|---|
| 1297 | | - struct drm_i915_error_request *erq) |
|---|
| 1227 | +static void record_request(const struct i915_request *request, |
|---|
| 1228 | + struct i915_request_coredump *erq) |
|---|
| 1298 | 1229 | { |
|---|
| 1299 | | - struct i915_gem_context *ctx = request->gem_context; |
|---|
| 1300 | | - |
|---|
| 1301 | | - erq->context = ctx->hw_id; |
|---|
| 1230 | + erq->flags = request->fence.flags; |
|---|
| 1231 | + erq->context = request->fence.context; |
|---|
| 1232 | + erq->seqno = request->fence.seqno; |
|---|
| 1302 | 1233 | erq->sched_attr = request->sched.attr; |
|---|
| 1303 | | - erq->ban_score = atomic_read(&ctx->ban_score); |
|---|
| 1304 | | - erq->seqno = request->global_seqno; |
|---|
| 1305 | | - erq->jiffies = request->emitted_jiffies; |
|---|
| 1306 | | - erq->start = i915_ggtt_offset(request->ring->vma); |
|---|
| 1307 | 1234 | erq->head = request->head; |
|---|
| 1308 | 1235 | erq->tail = request->tail; |
|---|
| 1309 | 1236 | |
|---|
| 1237 | + erq->pid = 0; |
|---|
| 1310 | 1238 | rcu_read_lock(); |
|---|
| 1311 | | - erq->pid = ctx->pid ? pid_nr(ctx->pid) : 0; |
|---|
| 1239 | + if (!intel_context_is_closed(request->context)) { |
|---|
| 1240 | + const struct i915_gem_context *ctx; |
|---|
| 1241 | + |
|---|
| 1242 | + ctx = rcu_dereference(request->context->gem_context); |
|---|
| 1243 | + if (ctx) |
|---|
| 1244 | + erq->pid = pid_nr(ctx->pid); |
|---|
| 1245 | + } |
|---|
| 1312 | 1246 | rcu_read_unlock(); |
|---|
| 1313 | 1247 | } |
|---|
| 1314 | 1248 | |
|---|
| 1315 | | -static void engine_record_requests(struct intel_engine_cs *engine, |
|---|
| 1316 | | - struct i915_request *first, |
|---|
| 1317 | | - struct drm_i915_error_engine *ee) |
|---|
| 1249 | +static void engine_record_execlists(struct intel_engine_coredump *ee) |
|---|
| 1318 | 1250 | { |
|---|
| 1319 | | - struct i915_request *request; |
|---|
| 1320 | | - int count; |
|---|
| 1251 | + const struct intel_engine_execlists * const el = &ee->engine->execlists; |
|---|
| 1252 | + struct i915_request * const *port = el->active; |
|---|
| 1253 | + unsigned int n = 0; |
|---|
| 1321 | 1254 | |
|---|
| 1322 | | - count = 0; |
|---|
| 1323 | | - request = first; |
|---|
| 1324 | | - list_for_each_entry_from(request, &engine->timeline.requests, link) |
|---|
| 1325 | | - count++; |
|---|
| 1326 | | - if (!count) |
|---|
| 1327 | | - return; |
|---|
| 1328 | | - |
|---|
| 1329 | | - ee->requests = kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC); |
|---|
| 1330 | | - if (!ee->requests) |
|---|
| 1331 | | - return; |
|---|
| 1332 | | - |
|---|
| 1333 | | - ee->num_requests = count; |
|---|
| 1334 | | - |
|---|
| 1335 | | - count = 0; |
|---|
| 1336 | | - request = first; |
|---|
| 1337 | | - list_for_each_entry_from(request, &engine->timeline.requests, link) { |
|---|
| 1338 | | - if (count >= ee->num_requests) { |
|---|
| 1339 | | - /* |
|---|
| 1340 | | - * If the ring request list was changed in |
|---|
| 1341 | | - * between the point where the error request |
|---|
| 1342 | | - * list was created and dimensioned and this |
|---|
| 1343 | | - * point then just exit early to avoid crashes. |
|---|
| 1344 | | - * |
|---|
| 1345 | | - * We don't need to communicate that the |
|---|
| 1346 | | - * request list changed state during error |
|---|
| 1347 | | - * state capture and that the error state is |
|---|
| 1348 | | - * slightly incorrect as a consequence since we |
|---|
| 1349 | | - * are typically only interested in the request |
|---|
| 1350 | | - * list state at the point of error state |
|---|
| 1351 | | - * capture, not in any changes happening during |
|---|
| 1352 | | - * the capture. |
|---|
| 1353 | | - */ |
|---|
| 1354 | | - break; |
|---|
| 1355 | | - } |
|---|
| 1356 | | - |
|---|
| 1357 | | - record_request(request, &ee->requests[count++]); |
|---|
| 1358 | | - } |
|---|
| 1359 | | - ee->num_requests = count; |
|---|
| 1360 | | -} |
|---|
| 1361 | | - |
|---|
| 1362 | | -static void error_record_engine_execlists(struct intel_engine_cs *engine, |
|---|
| 1363 | | - struct drm_i915_error_engine *ee) |
|---|
| 1364 | | -{ |
|---|
| 1365 | | - const struct intel_engine_execlists * const execlists = &engine->execlists; |
|---|
| 1366 | | - unsigned int n; |
|---|
| 1367 | | - |
|---|
| 1368 | | - for (n = 0; n < execlists_num_ports(execlists); n++) { |
|---|
| 1369 | | - struct i915_request *rq = port_request(&execlists->port[n]); |
|---|
| 1370 | | - |
|---|
| 1371 | | - if (!rq) |
|---|
| 1372 | | - break; |
|---|
| 1373 | | - |
|---|
| 1374 | | - record_request(rq, &ee->execlist[n]); |
|---|
| 1375 | | - } |
|---|
| 1255 | + while (*port) |
|---|
| 1256 | + record_request(*port++, &ee->execlist[n++]); |
|---|
| 1376 | 1257 | |
|---|
| 1377 | 1258 | ee->num_ports = n; |
|---|
| 1378 | 1259 | } |
|---|
| 1379 | 1260 | |
|---|
| 1380 | | -static void record_context(struct drm_i915_error_context *e, |
|---|
| 1381 | | - struct i915_gem_context *ctx) |
|---|
| 1261 | +static bool record_context(struct i915_gem_context_coredump *e, |
|---|
| 1262 | + const struct i915_request *rq) |
|---|
| 1382 | 1263 | { |
|---|
| 1383 | | - if (ctx->pid) { |
|---|
| 1384 | | - struct task_struct *task; |
|---|
| 1264 | + struct i915_gem_context *ctx; |
|---|
| 1265 | + struct task_struct *task; |
|---|
| 1266 | + bool simulated; |
|---|
| 1385 | 1267 | |
|---|
| 1386 | | - rcu_read_lock(); |
|---|
| 1387 | | - task = pid_task(ctx->pid, PIDTYPE_PID); |
|---|
| 1388 | | - if (task) { |
|---|
| 1389 | | - strcpy(e->comm, task->comm); |
|---|
| 1390 | | - e->pid = task->pid; |
|---|
| 1391 | | - } |
|---|
| 1392 | | - rcu_read_unlock(); |
|---|
| 1268 | + rcu_read_lock(); |
|---|
| 1269 | + ctx = rcu_dereference(rq->context->gem_context); |
|---|
| 1270 | + if (ctx && !kref_get_unless_zero(&ctx->ref)) |
|---|
| 1271 | + ctx = NULL; |
|---|
| 1272 | + rcu_read_unlock(); |
|---|
| 1273 | + if (!ctx) |
|---|
| 1274 | + return true; |
|---|
| 1275 | + |
|---|
| 1276 | + rcu_read_lock(); |
|---|
| 1277 | + task = pid_task(ctx->pid, PIDTYPE_PID); |
|---|
| 1278 | + if (task) { |
|---|
| 1279 | + strcpy(e->comm, task->comm); |
|---|
| 1280 | + e->pid = task->pid; |
|---|
| 1393 | 1281 | } |
|---|
| 1282 | + rcu_read_unlock(); |
|---|
| 1394 | 1283 | |
|---|
| 1395 | | - e->handle = ctx->user_handle; |
|---|
| 1396 | | - e->hw_id = ctx->hw_id; |
|---|
| 1397 | 1284 | e->sched_attr = ctx->sched; |
|---|
| 1398 | | - e->ban_score = atomic_read(&ctx->ban_score); |
|---|
| 1399 | | - e->bannable = i915_gem_context_is_bannable(ctx); |
|---|
| 1400 | 1285 | e->guilty = atomic_read(&ctx->guilty_count); |
|---|
| 1401 | 1286 | e->active = atomic_read(&ctx->active_count); |
|---|
| 1287 | + |
|---|
| 1288 | + e->total_runtime = rq->context->runtime.total; |
|---|
| 1289 | + e->avg_runtime = ewma_runtime_read(&rq->context->runtime.avg); |
|---|
| 1290 | + |
|---|
| 1291 | + simulated = i915_gem_context_no_error_capture(ctx); |
|---|
| 1292 | + |
|---|
| 1293 | + i915_gem_context_put(ctx); |
|---|
| 1294 | + return simulated; |
|---|
| 1402 | 1295 | } |
|---|
| 1403 | 1296 | |
|---|
| 1404 | | -static void request_record_user_bo(struct i915_request *request, |
|---|
| 1405 | | - struct drm_i915_error_engine *ee) |
|---|
| 1297 | +struct intel_engine_capture_vma { |
|---|
| 1298 | + struct intel_engine_capture_vma *next; |
|---|
| 1299 | + struct i915_vma *vma; |
|---|
| 1300 | + char name[16]; |
|---|
| 1301 | +}; |
|---|
| 1302 | + |
|---|
| 1303 | +static struct intel_engine_capture_vma * |
|---|
| 1304 | +capture_vma(struct intel_engine_capture_vma *next, |
|---|
| 1305 | + struct i915_vma *vma, |
|---|
| 1306 | + const char *name, |
|---|
| 1307 | + gfp_t gfp) |
|---|
| 1308 | +{ |
|---|
| 1309 | + struct intel_engine_capture_vma *c; |
|---|
| 1310 | + |
|---|
| 1311 | + if (!vma) |
|---|
| 1312 | + return next; |
|---|
| 1313 | + |
|---|
| 1314 | + c = kmalloc(sizeof(*c), gfp); |
|---|
| 1315 | + if (!c) |
|---|
| 1316 | + return next; |
|---|
| 1317 | + |
|---|
| 1318 | + if (!i915_active_acquire_if_busy(&vma->active)) { |
|---|
| 1319 | + kfree(c); |
|---|
| 1320 | + return next; |
|---|
| 1321 | + } |
|---|
| 1322 | + |
|---|
| 1323 | + strcpy(c->name, name); |
|---|
| 1324 | + c->vma = vma; /* reference held while active */ |
|---|
| 1325 | + |
|---|
| 1326 | + c->next = next; |
|---|
| 1327 | + return c; |
|---|
| 1328 | +} |
|---|
| 1329 | + |
|---|
| 1330 | +static struct intel_engine_capture_vma * |
|---|
| 1331 | +capture_user(struct intel_engine_capture_vma *capture, |
|---|
| 1332 | + const struct i915_request *rq, |
|---|
| 1333 | + gfp_t gfp) |
|---|
| 1406 | 1334 | { |
|---|
| 1407 | 1335 | struct i915_capture_list *c; |
|---|
| 1408 | | - struct drm_i915_error_object **bo; |
|---|
| 1409 | | - long count; |
|---|
| 1410 | 1336 | |
|---|
| 1411 | | - count = 0; |
|---|
| 1412 | | - for (c = request->capture_list; c; c = c->next) |
|---|
| 1413 | | - count++; |
|---|
| 1337 | + for (c = rq->capture_list; c; c = c->next) |
|---|
| 1338 | + capture = capture_vma(capture, c->vma, "user", gfp); |
|---|
| 1414 | 1339 | |
|---|
| 1415 | | - bo = NULL; |
|---|
| 1416 | | - if (count) |
|---|
| 1417 | | - bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC); |
|---|
| 1418 | | - if (!bo) |
|---|
| 1419 | | - return; |
|---|
| 1420 | | - |
|---|
| 1421 | | - count = 0; |
|---|
| 1422 | | - for (c = request->capture_list; c; c = c->next) { |
|---|
| 1423 | | - bo[count] = i915_error_object_create(request->i915, c->vma); |
|---|
| 1424 | | - if (!bo[count]) |
|---|
| 1425 | | - break; |
|---|
| 1426 | | - count++; |
|---|
| 1427 | | - } |
|---|
| 1428 | | - |
|---|
| 1429 | | - ee->user_bo = bo; |
|---|
| 1430 | | - ee->user_bo_count = count; |
|---|
| 1340 | + return capture; |
|---|
| 1431 | 1341 | } |
|---|
| 1432 | 1342 | |
|---|
| 1433 | | -static struct drm_i915_error_object * |
|---|
| 1434 | | -capture_object(struct drm_i915_private *dev_priv, |
|---|
| 1435 | | - struct drm_i915_gem_object *obj) |
|---|
| 1343 | +static void add_vma(struct intel_engine_coredump *ee, |
|---|
| 1344 | + struct i915_vma_coredump *vma) |
|---|
| 1436 | 1345 | { |
|---|
| 1437 | | - if (obj && i915_gem_object_has_pages(obj)) { |
|---|
| 1438 | | - struct i915_vma fake = { |
|---|
| 1439 | | - .node = { .start = U64_MAX, .size = obj->base.size }, |
|---|
| 1440 | | - .size = obj->base.size, |
|---|
| 1441 | | - .pages = obj->mm.pages, |
|---|
| 1442 | | - .obj = obj, |
|---|
| 1443 | | - }; |
|---|
| 1346 | + if (vma) { |
|---|
| 1347 | + vma->next = ee->vma; |
|---|
| 1348 | + ee->vma = vma; |
|---|
| 1349 | + } |
|---|
| 1350 | +} |
|---|
| 1444 | 1351 | |
|---|
| 1445 | | - return i915_error_object_create(dev_priv, &fake); |
|---|
| 1446 | | - } else { |
|---|
| 1352 | +struct intel_engine_coredump * |
|---|
| 1353 | +intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp) |
|---|
| 1354 | +{ |
|---|
| 1355 | + struct intel_engine_coredump *ee; |
|---|
| 1356 | + |
|---|
| 1357 | + ee = kzalloc(sizeof(*ee), gfp); |
|---|
| 1358 | + if (!ee) |
|---|
| 1359 | + return NULL; |
|---|
| 1360 | + |
|---|
| 1361 | + ee->engine = engine; |
|---|
| 1362 | + |
|---|
| 1363 | + engine_record_registers(ee); |
|---|
| 1364 | + engine_record_execlists(ee); |
|---|
| 1365 | + |
|---|
| 1366 | + return ee; |
|---|
| 1367 | +} |
|---|
| 1368 | + |
|---|
| 1369 | +struct intel_engine_capture_vma * |
|---|
| 1370 | +intel_engine_coredump_add_request(struct intel_engine_coredump *ee, |
|---|
| 1371 | + struct i915_request *rq, |
|---|
| 1372 | + gfp_t gfp) |
|---|
| 1373 | +{ |
|---|
| 1374 | + struct intel_engine_capture_vma *vma = NULL; |
|---|
| 1375 | + |
|---|
| 1376 | + ee->simulated |= record_context(&ee->context, rq); |
|---|
| 1377 | + if (ee->simulated) |
|---|
| 1378 | + return NULL; |
|---|
| 1379 | + |
|---|
| 1380 | + /* |
|---|
| 1381 | + * We need to copy these to an anonymous buffer |
|---|
| 1382 | + * as the simplest method to avoid being overwritten |
|---|
| 1383 | + * by userspace. |
|---|
| 1384 | + */ |
|---|
| 1385 | + vma = capture_vma(vma, rq->batch, "batch", gfp); |
|---|
| 1386 | + vma = capture_user(vma, rq, gfp); |
|---|
| 1387 | + vma = capture_vma(vma, rq->ring->vma, "ring", gfp); |
|---|
| 1388 | + vma = capture_vma(vma, rq->context->state, "HW context", gfp); |
|---|
| 1389 | + |
|---|
| 1390 | + ee->rq_head = rq->head; |
|---|
| 1391 | + ee->rq_post = rq->postfix; |
|---|
| 1392 | + ee->rq_tail = rq->tail; |
|---|
| 1393 | + |
|---|
| 1394 | + return vma; |
|---|
| 1395 | +} |
|---|
| 1396 | + |
|---|
| 1397 | +void |
|---|
| 1398 | +intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, |
|---|
| 1399 | + struct intel_engine_capture_vma *capture, |
|---|
| 1400 | + struct i915_vma_compress *compress) |
|---|
| 1401 | +{ |
|---|
| 1402 | + const struct intel_engine_cs *engine = ee->engine; |
|---|
| 1403 | + |
|---|
| 1404 | + while (capture) { |
|---|
| 1405 | + struct intel_engine_capture_vma *this = capture; |
|---|
| 1406 | + struct i915_vma *vma = this->vma; |
|---|
| 1407 | + |
|---|
| 1408 | + add_vma(ee, |
|---|
| 1409 | + i915_vma_coredump_create(engine->gt, |
|---|
| 1410 | + vma, this->name, |
|---|
| 1411 | + compress)); |
|---|
| 1412 | + |
|---|
| 1413 | + i915_active_release(&vma->active); |
|---|
| 1414 | + |
|---|
| 1415 | + capture = this->next; |
|---|
| 1416 | + kfree(this); |
|---|
| 1417 | + } |
|---|
| 1418 | + |
|---|
| 1419 | + add_vma(ee, |
|---|
| 1420 | + i915_vma_coredump_create(engine->gt, |
|---|
| 1421 | + engine->status_page.vma, |
|---|
| 1422 | + "HW Status", |
|---|
| 1423 | + compress)); |
|---|
| 1424 | + |
|---|
| 1425 | + add_vma(ee, |
|---|
| 1426 | + i915_vma_coredump_create(engine->gt, |
|---|
| 1427 | + engine->wa_ctx.vma, |
|---|
| 1428 | + "WA context", |
|---|
| 1429 | + compress)); |
|---|
| 1430 | +} |
|---|
| 1431 | + |
|---|
| 1432 | +static struct intel_engine_coredump * |
|---|
| 1433 | +capture_engine(struct intel_engine_cs *engine, |
|---|
| 1434 | + struct i915_vma_compress *compress) |
|---|
| 1435 | +{ |
|---|
| 1436 | + struct intel_engine_capture_vma *capture = NULL; |
|---|
| 1437 | + struct intel_engine_coredump *ee; |
|---|
| 1438 | + struct i915_request *rq; |
|---|
| 1439 | + unsigned long flags; |
|---|
| 1440 | + |
|---|
| 1441 | + ee = intel_engine_coredump_alloc(engine, GFP_KERNEL); |
|---|
| 1442 | + if (!ee) |
|---|
| 1443 | + return NULL; |
|---|
| 1444 | + |
|---|
| 1445 | + spin_lock_irqsave(&engine->active.lock, flags); |
|---|
| 1446 | + rq = intel_engine_find_active_request(engine); |
|---|
| 1447 | + if (rq) |
|---|
| 1448 | + capture = intel_engine_coredump_add_request(ee, rq, |
|---|
| 1449 | + ATOMIC_MAYFAIL); |
|---|
| 1450 | + spin_unlock_irqrestore(&engine->active.lock, flags); |
|---|
| 1451 | + if (!capture) { |
|---|
| 1452 | + kfree(ee); |
|---|
| 1447 | 1453 | return NULL; |
|---|
| 1448 | 1454 | } |
|---|
| 1455 | + |
|---|
| 1456 | + intel_engine_coredump_add_vma(ee, capture, compress); |
|---|
| 1457 | + |
|---|
| 1458 | + return ee; |
|---|
| 1449 | 1459 | } |
|---|
| 1450 | 1460 | |
|---|
| 1451 | | -static void gem_record_rings(struct i915_gpu_state *error) |
|---|
| 1461 | +static void |
|---|
| 1462 | +gt_record_engines(struct intel_gt_coredump *gt, |
|---|
| 1463 | + struct i915_vma_compress *compress) |
|---|
| 1452 | 1464 | { |
|---|
| 1453 | | - struct drm_i915_private *i915 = error->i915; |
|---|
| 1454 | | - struct i915_ggtt *ggtt = &i915->ggtt; |
|---|
| 1455 | | - int i; |
|---|
| 1465 | + struct intel_engine_cs *engine; |
|---|
| 1466 | + enum intel_engine_id id; |
|---|
| 1456 | 1467 | |
|---|
| 1457 | | - for (i = 0; i < I915_NUM_ENGINES; i++) { |
|---|
| 1458 | | - struct intel_engine_cs *engine = i915->engine[i]; |
|---|
| 1459 | | - struct drm_i915_error_engine *ee = &error->engine[i]; |
|---|
| 1460 | | - struct i915_request *request; |
|---|
| 1468 | + for_each_engine(engine, gt->_gt, id) { |
|---|
| 1469 | + struct intel_engine_coredump *ee; |
|---|
| 1461 | 1470 | |
|---|
| 1462 | | - ee->engine_id = -1; |
|---|
| 1471 | + /* Refill our page pool before entering atomic section */ |
|---|
| 1472 | + pool_refill(&compress->pool, ALLOW_FAIL); |
|---|
| 1463 | 1473 | |
|---|
| 1464 | | - if (!engine) |
|---|
| 1474 | + ee = capture_engine(engine, compress); |
|---|
| 1475 | + if (!ee) |
|---|
| 1465 | 1476 | continue; |
|---|
| 1466 | 1477 | |
|---|
| 1467 | | - ee->engine_id = i; |
|---|
| 1468 | | - |
|---|
| 1469 | | - error_record_engine_registers(error, engine, ee); |
|---|
| 1470 | | - error_record_engine_waiters(engine, ee); |
|---|
| 1471 | | - error_record_engine_execlists(engine, ee); |
|---|
| 1472 | | - |
|---|
| 1473 | | - request = i915_gem_find_active_request(engine); |
|---|
| 1474 | | - if (request) { |
|---|
| 1475 | | - struct i915_gem_context *ctx = request->gem_context; |
|---|
| 1476 | | - struct intel_ring *ring; |
|---|
| 1477 | | - |
|---|
| 1478 | | - ee->vm = ctx->ppgtt ? &ctx->ppgtt->vm : &ggtt->vm; |
|---|
| 1479 | | - |
|---|
| 1480 | | - record_context(&ee->context, ctx); |
|---|
| 1481 | | - |
|---|
| 1482 | | - /* We need to copy these to an anonymous buffer |
|---|
| 1483 | | - * as the simplest method to avoid being overwritten |
|---|
| 1484 | | - * by userspace. |
|---|
| 1485 | | - */ |
|---|
| 1486 | | - ee->batchbuffer = |
|---|
| 1487 | | - i915_error_object_create(i915, request->batch); |
|---|
| 1488 | | - |
|---|
| 1489 | | - if (HAS_BROKEN_CS_TLB(i915)) |
|---|
| 1490 | | - ee->wa_batchbuffer = |
|---|
| 1491 | | - i915_error_object_create(i915, |
|---|
| 1492 | | - engine->scratch); |
|---|
| 1493 | | - request_record_user_bo(request, ee); |
|---|
| 1494 | | - |
|---|
| 1495 | | - ee->ctx = |
|---|
| 1496 | | - i915_error_object_create(i915, |
|---|
| 1497 | | - request->hw_context->state); |
|---|
| 1498 | | - |
|---|
| 1499 | | - error->simulated |= |
|---|
| 1500 | | - i915_gem_context_no_error_capture(ctx); |
|---|
| 1501 | | - |
|---|
| 1502 | | - ee->rq_head = request->head; |
|---|
| 1503 | | - ee->rq_post = request->postfix; |
|---|
| 1504 | | - ee->rq_tail = request->tail; |
|---|
| 1505 | | - |
|---|
| 1506 | | - ring = request->ring; |
|---|
| 1507 | | - ee->cpu_ring_head = ring->head; |
|---|
| 1508 | | - ee->cpu_ring_tail = ring->tail; |
|---|
| 1509 | | - ee->ringbuffer = |
|---|
| 1510 | | - i915_error_object_create(i915, ring->vma); |
|---|
| 1511 | | - |
|---|
| 1512 | | - engine_record_requests(engine, request, ee); |
|---|
| 1478 | + gt->simulated |= ee->simulated; |
|---|
| 1479 | + if (ee->simulated) { |
|---|
| 1480 | + kfree(ee); |
|---|
| 1481 | + continue; |
|---|
| 1513 | 1482 | } |
|---|
| 1514 | 1483 | |
|---|
| 1515 | | - ee->hws_page = |
|---|
| 1516 | | - i915_error_object_create(i915, |
|---|
| 1517 | | - engine->status_page.vma); |
|---|
| 1518 | | - |
|---|
| 1519 | | - ee->wa_ctx = i915_error_object_create(i915, engine->wa_ctx.vma); |
|---|
| 1520 | | - |
|---|
| 1521 | | - ee->default_state = capture_object(i915, engine->default_state); |
|---|
| 1484 | + ee->next = gt->engine; |
|---|
| 1485 | + gt->engine = ee; |
|---|
| 1522 | 1486 | } |
|---|
| 1523 | 1487 | } |
|---|
| 1524 | 1488 | |
|---|
| 1525 | | -static void gem_capture_vm(struct i915_gpu_state *error, |
|---|
| 1526 | | - struct i915_address_space *vm, |
|---|
| 1527 | | - int idx) |
|---|
| 1489 | +static struct intel_uc_coredump * |
|---|
| 1490 | +gt_record_uc(struct intel_gt_coredump *gt, |
|---|
| 1491 | + struct i915_vma_compress *compress) |
|---|
| 1528 | 1492 | { |
|---|
| 1529 | | - struct drm_i915_error_buffer *active_bo; |
|---|
| 1530 | | - struct i915_vma *vma; |
|---|
| 1531 | | - int count; |
|---|
| 1493 | + const struct intel_uc *uc = >->_gt->uc; |
|---|
| 1494 | + struct intel_uc_coredump *error_uc; |
|---|
| 1532 | 1495 | |
|---|
| 1533 | | - count = 0; |
|---|
| 1534 | | - list_for_each_entry(vma, &vm->active_list, vm_link) |
|---|
| 1535 | | - count++; |
|---|
| 1496 | + error_uc = kzalloc(sizeof(*error_uc), ALLOW_FAIL); |
|---|
| 1497 | + if (!error_uc) |
|---|
| 1498 | + return NULL; |
|---|
| 1536 | 1499 | |
|---|
| 1537 | | - active_bo = NULL; |
|---|
| 1538 | | - if (count) |
|---|
| 1539 | | - active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC); |
|---|
| 1540 | | - if (active_bo) |
|---|
| 1541 | | - count = capture_error_bo(active_bo, count, &vm->active_list, false); |
|---|
| 1542 | | - else |
|---|
| 1543 | | - count = 0; |
|---|
| 1544 | | - |
|---|
| 1545 | | - error->active_vm[idx] = vm; |
|---|
| 1546 | | - error->active_bo[idx] = active_bo; |
|---|
| 1547 | | - error->active_bo_count[idx] = count; |
|---|
| 1548 | | -} |
|---|
| 1549 | | - |
|---|
| 1550 | | -static void capture_active_buffers(struct i915_gpu_state *error) |
|---|
| 1551 | | -{ |
|---|
| 1552 | | - int cnt = 0, i, j; |
|---|
| 1553 | | - |
|---|
| 1554 | | - BUILD_BUG_ON(ARRAY_SIZE(error->engine) > ARRAY_SIZE(error->active_bo)); |
|---|
| 1555 | | - BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_vm)); |
|---|
| 1556 | | - BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_bo_count)); |
|---|
| 1557 | | - |
|---|
| 1558 | | - /* Scan each engine looking for unique active contexts/vm */ |
|---|
| 1559 | | - for (i = 0; i < ARRAY_SIZE(error->engine); i++) { |
|---|
| 1560 | | - struct drm_i915_error_engine *ee = &error->engine[i]; |
|---|
| 1561 | | - bool found; |
|---|
| 1562 | | - |
|---|
| 1563 | | - if (!ee->vm) |
|---|
| 1564 | | - continue; |
|---|
| 1565 | | - |
|---|
| 1566 | | - found = false; |
|---|
| 1567 | | - for (j = 0; j < i && !found; j++) |
|---|
| 1568 | | - found = error->engine[j].vm == ee->vm; |
|---|
| 1569 | | - if (!found) |
|---|
| 1570 | | - gem_capture_vm(error, ee->vm, cnt++); |
|---|
| 1571 | | - } |
|---|
| 1572 | | -} |
|---|
| 1573 | | - |
|---|
| 1574 | | -static void capture_pinned_buffers(struct i915_gpu_state *error) |
|---|
| 1575 | | -{ |
|---|
| 1576 | | - struct i915_address_space *vm = &error->i915->ggtt.vm; |
|---|
| 1577 | | - struct drm_i915_error_buffer *bo; |
|---|
| 1578 | | - struct i915_vma *vma; |
|---|
| 1579 | | - int count_inactive, count_active; |
|---|
| 1580 | | - |
|---|
| 1581 | | - count_inactive = 0; |
|---|
| 1582 | | - list_for_each_entry(vma, &vm->inactive_list, vm_link) |
|---|
| 1583 | | - count_inactive++; |
|---|
| 1584 | | - |
|---|
| 1585 | | - count_active = 0; |
|---|
| 1586 | | - list_for_each_entry(vma, &vm->active_list, vm_link) |
|---|
| 1587 | | - count_active++; |
|---|
| 1588 | | - |
|---|
| 1589 | | - bo = NULL; |
|---|
| 1590 | | - if (count_inactive + count_active) |
|---|
| 1591 | | - bo = kcalloc(count_inactive + count_active, |
|---|
| 1592 | | - sizeof(*bo), GFP_ATOMIC); |
|---|
| 1593 | | - if (!bo) |
|---|
| 1594 | | - return; |
|---|
| 1595 | | - |
|---|
| 1596 | | - count_inactive = capture_error_bo(bo, count_inactive, |
|---|
| 1597 | | - &vm->active_list, true); |
|---|
| 1598 | | - count_active = capture_error_bo(bo + count_inactive, count_active, |
|---|
| 1599 | | - &vm->inactive_list, true); |
|---|
| 1600 | | - error->pinned_bo_count = count_inactive + count_active; |
|---|
| 1601 | | - error->pinned_bo = bo; |
|---|
| 1602 | | -} |
|---|
| 1603 | | - |
|---|
| 1604 | | -static void capture_uc_state(struct i915_gpu_state *error) |
|---|
| 1605 | | -{ |
|---|
| 1606 | | - struct drm_i915_private *i915 = error->i915; |
|---|
| 1607 | | - struct i915_error_uc *error_uc = &error->uc; |
|---|
| 1608 | | - |
|---|
| 1609 | | - /* Capturing uC state won't be useful if there is no GuC */ |
|---|
| 1610 | | - if (!error->device_info.has_guc) |
|---|
| 1611 | | - return; |
|---|
| 1612 | | - |
|---|
| 1613 | | - error_uc->guc_fw = i915->guc.fw; |
|---|
| 1614 | | - error_uc->huc_fw = i915->huc.fw; |
|---|
| 1500 | + memcpy(&error_uc->guc_fw, &uc->guc.fw, sizeof(uc->guc.fw)); |
|---|
| 1501 | + memcpy(&error_uc->huc_fw, &uc->huc.fw, sizeof(uc->huc.fw)); |
|---|
| 1615 | 1502 | |
|---|
| 1616 | 1503 | /* Non-default firmware paths will be specified by the modparam. |
|---|
| 1617 | 1504 | * As modparams are generally accesible from the userspace make |
|---|
| 1618 | 1505 | * explicit copies of the firmware paths. |
|---|
| 1619 | 1506 | */ |
|---|
| 1620 | | - error_uc->guc_fw.path = kstrdup(i915->guc.fw.path, GFP_ATOMIC); |
|---|
| 1621 | | - error_uc->huc_fw.path = kstrdup(i915->huc.fw.path, GFP_ATOMIC); |
|---|
| 1622 | | - error_uc->guc_log = i915_error_object_create(i915, i915->guc.log.vma); |
|---|
| 1507 | + error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL); |
|---|
| 1508 | + error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL); |
|---|
| 1509 | + error_uc->guc_log = |
|---|
| 1510 | + i915_vma_coredump_create(gt->_gt, |
|---|
| 1511 | + uc->guc.log.vma, "GuC log buffer", |
|---|
| 1512 | + compress); |
|---|
| 1513 | + |
|---|
| 1514 | + return error_uc; |
|---|
| 1515 | +} |
|---|
| 1516 | + |
|---|
| 1517 | +static void gt_capture_prepare(struct intel_gt_coredump *gt) |
|---|
| 1518 | +{ |
|---|
| 1519 | + struct i915_ggtt *ggtt = gt->_gt->ggtt; |
|---|
| 1520 | + |
|---|
| 1521 | + mutex_lock(&ggtt->error_mutex); |
|---|
| 1522 | +} |
|---|
| 1523 | + |
|---|
| 1524 | +static void gt_capture_finish(struct intel_gt_coredump *gt) |
|---|
| 1525 | +{ |
|---|
| 1526 | + struct i915_ggtt *ggtt = gt->_gt->ggtt; |
|---|
| 1527 | + |
|---|
| 1528 | + if (drm_mm_node_allocated(&ggtt->error_capture)) |
|---|
| 1529 | + ggtt->vm.clear_range(&ggtt->vm, |
|---|
| 1530 | + ggtt->error_capture.start, |
|---|
| 1531 | + PAGE_SIZE); |
|---|
| 1532 | + |
|---|
| 1533 | + mutex_unlock(&ggtt->error_mutex); |
|---|
| 1623 | 1534 | } |
|---|
| 1624 | 1535 | |
|---|
| 1625 | 1536 | /* Capture all registers which don't fit into another category. */ |
|---|
| 1626 | | -static void capture_reg_state(struct i915_gpu_state *error) |
|---|
| 1537 | +static void gt_record_regs(struct intel_gt_coredump *gt) |
|---|
| 1627 | 1538 | { |
|---|
| 1628 | | - struct drm_i915_private *dev_priv = error->i915; |
|---|
| 1539 | + struct intel_uncore *uncore = gt->_gt->uncore; |
|---|
| 1540 | + struct drm_i915_private *i915 = uncore->i915; |
|---|
| 1629 | 1541 | int i; |
|---|
| 1630 | 1542 | |
|---|
| 1631 | | - /* General organization |
|---|
| 1543 | + /* |
|---|
| 1544 | + * General organization |
|---|
| 1632 | 1545 | * 1. Registers specific to a single generation |
|---|
| 1633 | 1546 | * 2. Registers which belong to multiple generations |
|---|
| 1634 | 1547 | * 3. Feature specific registers. |
|---|
| .. | .. |
|---|
| 1637 | 1550 | */ |
|---|
| 1638 | 1551 | |
|---|
| 1639 | 1552 | /* 1: Registers specific to a single generation */ |
|---|
| 1640 | | - if (IS_VALLEYVIEW(dev_priv)) { |
|---|
| 1641 | | - error->gtier[0] = I915_READ(GTIER); |
|---|
| 1642 | | - error->ier = I915_READ(VLV_IER); |
|---|
| 1643 | | - error->forcewake = I915_READ_FW(FORCEWAKE_VLV); |
|---|
| 1553 | + if (IS_VALLEYVIEW(i915)) { |
|---|
| 1554 | + gt->gtier[0] = intel_uncore_read(uncore, GTIER); |
|---|
| 1555 | + gt->ier = intel_uncore_read(uncore, VLV_IER); |
|---|
| 1556 | + gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_VLV); |
|---|
| 1644 | 1557 | } |
|---|
| 1645 | 1558 | |
|---|
| 1646 | | - if (IS_GEN7(dev_priv)) |
|---|
| 1647 | | - error->err_int = I915_READ(GEN7_ERR_INT); |
|---|
| 1559 | + if (IS_GEN(i915, 7)) |
|---|
| 1560 | + gt->err_int = intel_uncore_read(uncore, GEN7_ERR_INT); |
|---|
| 1648 | 1561 | |
|---|
| 1649 | | - if (INTEL_GEN(dev_priv) >= 8) { |
|---|
| 1650 | | - error->fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0); |
|---|
| 1651 | | - error->fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1); |
|---|
| 1562 | + if (INTEL_GEN(i915) >= 12) { |
|---|
| 1563 | + gt->fault_data0 = intel_uncore_read(uncore, |
|---|
| 1564 | + GEN12_FAULT_TLB_DATA0); |
|---|
| 1565 | + gt->fault_data1 = intel_uncore_read(uncore, |
|---|
| 1566 | + GEN12_FAULT_TLB_DATA1); |
|---|
| 1567 | + } else if (INTEL_GEN(i915) >= 8) { |
|---|
| 1568 | + gt->fault_data0 = intel_uncore_read(uncore, |
|---|
| 1569 | + GEN8_FAULT_TLB_DATA0); |
|---|
| 1570 | + gt->fault_data1 = intel_uncore_read(uncore, |
|---|
| 1571 | + GEN8_FAULT_TLB_DATA1); |
|---|
| 1652 | 1572 | } |
|---|
| 1653 | 1573 | |
|---|
| 1654 | | - if (IS_GEN6(dev_priv)) { |
|---|
| 1655 | | - error->forcewake = I915_READ_FW(FORCEWAKE); |
|---|
| 1656 | | - error->gab_ctl = I915_READ(GAB_CTL); |
|---|
| 1657 | | - error->gfx_mode = I915_READ(GFX_MODE); |
|---|
| 1574 | + if (IS_GEN(i915, 6)) { |
|---|
| 1575 | + gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE); |
|---|
| 1576 | + gt->gab_ctl = intel_uncore_read(uncore, GAB_CTL); |
|---|
| 1577 | + gt->gfx_mode = intel_uncore_read(uncore, GFX_MODE); |
|---|
| 1658 | 1578 | } |
|---|
| 1659 | 1579 | |
|---|
| 1660 | 1580 | /* 2: Registers which belong to multiple generations */ |
|---|
| 1661 | | - if (INTEL_GEN(dev_priv) >= 7) |
|---|
| 1662 | | - error->forcewake = I915_READ_FW(FORCEWAKE_MT); |
|---|
| 1581 | + if (INTEL_GEN(i915) >= 7) |
|---|
| 1582 | + gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_MT); |
|---|
| 1663 | 1583 | |
|---|
| 1664 | | - if (INTEL_GEN(dev_priv) >= 6) { |
|---|
| 1665 | | - error->derrmr = I915_READ(DERRMR); |
|---|
| 1666 | | - error->error = I915_READ(ERROR_GEN6); |
|---|
| 1667 | | - error->done_reg = I915_READ(DONE_REG); |
|---|
| 1584 | + if (INTEL_GEN(i915) >= 6) { |
|---|
| 1585 | + gt->derrmr = intel_uncore_read(uncore, DERRMR); |
|---|
| 1586 | + if (INTEL_GEN(i915) < 12) { |
|---|
| 1587 | + gt->error = intel_uncore_read(uncore, ERROR_GEN6); |
|---|
| 1588 | + gt->done_reg = intel_uncore_read(uncore, DONE_REG); |
|---|
| 1589 | + } |
|---|
| 1668 | 1590 | } |
|---|
| 1669 | 1591 | |
|---|
| 1670 | | - if (INTEL_GEN(dev_priv) >= 5) |
|---|
| 1671 | | - error->ccid = I915_READ(CCID); |
|---|
| 1672 | | - |
|---|
| 1673 | 1592 | /* 3: Feature specific registers */ |
|---|
| 1674 | | - if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { |
|---|
| 1675 | | - error->gam_ecochk = I915_READ(GAM_ECOCHK); |
|---|
| 1676 | | - error->gac_eco = I915_READ(GAC_ECO_BITS); |
|---|
| 1593 | + if (IS_GEN_RANGE(i915, 6, 7)) { |
|---|
| 1594 | + gt->gam_ecochk = intel_uncore_read(uncore, GAM_ECOCHK); |
|---|
| 1595 | + gt->gac_eco = intel_uncore_read(uncore, GAC_ECO_BITS); |
|---|
| 1596 | + } |
|---|
| 1597 | + |
|---|
| 1598 | + if (IS_GEN_RANGE(i915, 8, 11)) |
|---|
| 1599 | + gt->gtt_cache = intel_uncore_read(uncore, HSW_GTT_CACHE_EN); |
|---|
| 1600 | + |
|---|
| 1601 | + if (IS_GEN(i915, 12)) |
|---|
| 1602 | + gt->aux_err = intel_uncore_read(uncore, GEN12_AUX_ERR_DBG); |
|---|
| 1603 | + |
|---|
| 1604 | + if (INTEL_GEN(i915) >= 12) { |
|---|
| 1605 | + for (i = 0; i < GEN12_SFC_DONE_MAX; i++) { |
|---|
| 1606 | + /* |
|---|
| 1607 | + * SFC_DONE resides in the VD forcewake domain, so it |
|---|
| 1608 | + * only exists if the corresponding VCS engine is |
|---|
| 1609 | + * present. |
|---|
| 1610 | + */ |
|---|
| 1611 | + if (!HAS_ENGINE(gt->_gt, _VCS(i * 2))) |
|---|
| 1612 | + continue; |
|---|
| 1613 | + |
|---|
| 1614 | + gt->sfc_done[i] = |
|---|
| 1615 | + intel_uncore_read(uncore, GEN12_SFC_DONE(i)); |
|---|
| 1616 | + } |
|---|
| 1617 | + |
|---|
| 1618 | + gt->gam_done = intel_uncore_read(uncore, GEN12_GAM_DONE); |
|---|
| 1677 | 1619 | } |
|---|
| 1678 | 1620 | |
|---|
| 1679 | 1621 | /* 4: Everything else */ |
|---|
| 1680 | | - if (INTEL_GEN(dev_priv) >= 11) { |
|---|
| 1681 | | - error->ier = I915_READ(GEN8_DE_MISC_IER); |
|---|
| 1682 | | - error->gtier[0] = I915_READ(GEN11_RENDER_COPY_INTR_ENABLE); |
|---|
| 1683 | | - error->gtier[1] = I915_READ(GEN11_VCS_VECS_INTR_ENABLE); |
|---|
| 1684 | | - error->gtier[2] = I915_READ(GEN11_GUC_SG_INTR_ENABLE); |
|---|
| 1685 | | - error->gtier[3] = I915_READ(GEN11_GPM_WGBOXPERF_INTR_ENABLE); |
|---|
| 1686 | | - error->gtier[4] = I915_READ(GEN11_CRYPTO_RSVD_INTR_ENABLE); |
|---|
| 1687 | | - error->gtier[5] = I915_READ(GEN11_GUNIT_CSME_INTR_ENABLE); |
|---|
| 1688 | | - error->ngtier = 6; |
|---|
| 1689 | | - } else if (INTEL_GEN(dev_priv) >= 8) { |
|---|
| 1690 | | - error->ier = I915_READ(GEN8_DE_MISC_IER); |
|---|
| 1622 | + if (INTEL_GEN(i915) >= 11) { |
|---|
| 1623 | + gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); |
|---|
| 1624 | + gt->gtier[0] = |
|---|
| 1625 | + intel_uncore_read(uncore, |
|---|
| 1626 | + GEN11_RENDER_COPY_INTR_ENABLE); |
|---|
| 1627 | + gt->gtier[1] = |
|---|
| 1628 | + intel_uncore_read(uncore, GEN11_VCS_VECS_INTR_ENABLE); |
|---|
| 1629 | + gt->gtier[2] = |
|---|
| 1630 | + intel_uncore_read(uncore, GEN11_GUC_SG_INTR_ENABLE); |
|---|
| 1631 | + gt->gtier[3] = |
|---|
| 1632 | + intel_uncore_read(uncore, |
|---|
| 1633 | + GEN11_GPM_WGBOXPERF_INTR_ENABLE); |
|---|
| 1634 | + gt->gtier[4] = |
|---|
| 1635 | + intel_uncore_read(uncore, |
|---|
| 1636 | + GEN11_CRYPTO_RSVD_INTR_ENABLE); |
|---|
| 1637 | + gt->gtier[5] = |
|---|
| 1638 | + intel_uncore_read(uncore, |
|---|
| 1639 | + GEN11_GUNIT_CSME_INTR_ENABLE); |
|---|
| 1640 | + gt->ngtier = 6; |
|---|
| 1641 | + } else if (INTEL_GEN(i915) >= 8) { |
|---|
| 1642 | + gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); |
|---|
| 1691 | 1643 | for (i = 0; i < 4; i++) |
|---|
| 1692 | | - error->gtier[i] = I915_READ(GEN8_GT_IER(i)); |
|---|
| 1693 | | - error->ngtier = 4; |
|---|
| 1694 | | - } else if (HAS_PCH_SPLIT(dev_priv)) { |
|---|
| 1695 | | - error->ier = I915_READ(DEIER); |
|---|
| 1696 | | - error->gtier[0] = I915_READ(GTIER); |
|---|
| 1697 | | - error->ngtier = 1; |
|---|
| 1698 | | - } else if (IS_GEN2(dev_priv)) { |
|---|
| 1699 | | - error->ier = I915_READ16(IER); |
|---|
| 1700 | | - } else if (!IS_VALLEYVIEW(dev_priv)) { |
|---|
| 1701 | | - error->ier = I915_READ(IER); |
|---|
| 1644 | + gt->gtier[i] = |
|---|
| 1645 | + intel_uncore_read(uncore, GEN8_GT_IER(i)); |
|---|
| 1646 | + gt->ngtier = 4; |
|---|
| 1647 | + } else if (HAS_PCH_SPLIT(i915)) { |
|---|
| 1648 | + gt->ier = intel_uncore_read(uncore, DEIER); |
|---|
| 1649 | + gt->gtier[0] = intel_uncore_read(uncore, GTIER); |
|---|
| 1650 | + gt->ngtier = 1; |
|---|
| 1651 | + } else if (IS_GEN(i915, 2)) { |
|---|
| 1652 | + gt->ier = intel_uncore_read16(uncore, GEN2_IER); |
|---|
| 1653 | + } else if (!IS_VALLEYVIEW(i915)) { |
|---|
| 1654 | + gt->ier = intel_uncore_read(uncore, GEN2_IER); |
|---|
| 1702 | 1655 | } |
|---|
| 1703 | | - error->eir = I915_READ(EIR); |
|---|
| 1704 | | - error->pgtbl_er = I915_READ(PGTBL_ER); |
|---|
| 1656 | + gt->eir = intel_uncore_read(uncore, EIR); |
|---|
| 1657 | + gt->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER); |
|---|
| 1705 | 1658 | } |
|---|
| 1706 | 1659 | |
|---|
| 1707 | | -static void i915_error_capture_msg(struct drm_i915_private *dev_priv, |
|---|
| 1708 | | - struct i915_gpu_state *error, |
|---|
| 1709 | | - u32 engine_mask, |
|---|
| 1710 | | - const char *error_msg) |
|---|
| 1660 | +static void gt_record_info(struct intel_gt_coredump *gt) |
|---|
| 1711 | 1661 | { |
|---|
| 1712 | | - u32 ecode; |
|---|
| 1713 | | - int engine_id = -1, len; |
|---|
| 1662 | + memcpy(>->info, >->_gt->info, sizeof(struct intel_gt_info)); |
|---|
| 1663 | +} |
|---|
| 1714 | 1664 | |
|---|
| 1715 | | - ecode = i915_error_generate_code(dev_priv, error, &engine_id); |
|---|
| 1665 | +/* |
|---|
| 1666 | + * Generate a semi-unique error code. The code is not meant to have meaning, The |
|---|
| 1667 | + * code's only purpose is to try to prevent false duplicated bug reports by |
|---|
| 1668 | + * grossly estimating a GPU error state. |
|---|
| 1669 | + * |
|---|
| 1670 | + * TODO Ideally, hashing the batchbuffer would be a very nice way to determine |
|---|
| 1671 | + * the hang if we could strip the GTT offset information from it. |
|---|
| 1672 | + * |
|---|
| 1673 | + * It's only a small step better than a random number in its current form. |
|---|
| 1674 | + */ |
|---|
| 1675 | +static u32 generate_ecode(const struct intel_engine_coredump *ee) |
|---|
| 1676 | +{ |
|---|
| 1677 | + /* |
|---|
| 1678 | + * IPEHR would be an ideal way to detect errors, as it's the gross |
|---|
| 1679 | + * measure of "the command that hung." However, has some very common |
|---|
| 1680 | + * synchronization commands which almost always appear in the case |
|---|
| 1681 | + * strictly a client bug. Use instdone to differentiate those some. |
|---|
| 1682 | + */ |
|---|
| 1683 | + return ee ? ee->ipehr ^ ee->instdone.instdone : 0; |
|---|
| 1684 | +} |
|---|
| 1685 | + |
|---|
| 1686 | +static const char *error_msg(struct i915_gpu_coredump *error) |
|---|
| 1687 | +{ |
|---|
| 1688 | + struct intel_engine_coredump *first = NULL; |
|---|
| 1689 | + struct intel_gt_coredump *gt; |
|---|
| 1690 | + intel_engine_mask_t engines; |
|---|
| 1691 | + int len; |
|---|
| 1692 | + |
|---|
| 1693 | + engines = 0; |
|---|
| 1694 | + for (gt = error->gt; gt; gt = gt->next) { |
|---|
| 1695 | + struct intel_engine_coredump *cs; |
|---|
| 1696 | + |
|---|
| 1697 | + if (gt->engine && !first) |
|---|
| 1698 | + first = gt->engine; |
|---|
| 1699 | + |
|---|
| 1700 | + for (cs = gt->engine; cs; cs = cs->next) |
|---|
| 1701 | + engines |= cs->engine->mask; |
|---|
| 1702 | + } |
|---|
| 1716 | 1703 | |
|---|
| 1717 | 1704 | len = scnprintf(error->error_msg, sizeof(error->error_msg), |
|---|
| 1718 | | - "GPU HANG: ecode %d:%d:0x%08x", |
|---|
| 1719 | | - INTEL_GEN(dev_priv), engine_id, ecode); |
|---|
| 1720 | | - |
|---|
| 1721 | | - if (engine_id != -1 && error->engine[engine_id].context.pid) |
|---|
| 1705 | + "GPU HANG: ecode %d:%x:%08x", |
|---|
| 1706 | + INTEL_GEN(error->i915), engines, |
|---|
| 1707 | + generate_ecode(first)); |
|---|
| 1708 | + if (first && first->context.pid) { |
|---|
| 1709 | + /* Just show the first executing process, more is confusing */ |
|---|
| 1722 | 1710 | len += scnprintf(error->error_msg + len, |
|---|
| 1723 | 1711 | sizeof(error->error_msg) - len, |
|---|
| 1724 | 1712 | ", in %s [%d]", |
|---|
| 1725 | | - error->engine[engine_id].context.comm, |
|---|
| 1726 | | - error->engine[engine_id].context.pid); |
|---|
| 1713 | + first->context.comm, first->context.pid); |
|---|
| 1714 | + } |
|---|
| 1727 | 1715 | |
|---|
| 1728 | | - scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, |
|---|
| 1729 | | - ", reason: %s, action: %s", |
|---|
| 1730 | | - error_msg, |
|---|
| 1731 | | - engine_mask ? "reset" : "continue"); |
|---|
| 1716 | + return error->error_msg; |
|---|
| 1732 | 1717 | } |
|---|
| 1733 | 1718 | |
|---|
| 1734 | | -static void capture_gen_state(struct i915_gpu_state *error) |
|---|
| 1719 | +static void capture_gen(struct i915_gpu_coredump *error) |
|---|
| 1735 | 1720 | { |
|---|
| 1736 | 1721 | struct drm_i915_private *i915 = error->i915; |
|---|
| 1737 | 1722 | |
|---|
| 1738 | | - error->awake = i915->gt.awake; |
|---|
| 1739 | 1723 | error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count); |
|---|
| 1740 | 1724 | error->suspended = i915->runtime_pm.suspended; |
|---|
| 1741 | 1725 | |
|---|
| .. | .. |
|---|
| 1746 | 1730 | error->reset_count = i915_reset_count(&i915->gpu_error); |
|---|
| 1747 | 1731 | error->suspend_count = i915->suspend_count; |
|---|
| 1748 | 1732 | |
|---|
| 1733 | + i915_params_copy(&error->params, &i915->params); |
|---|
| 1749 | 1734 | memcpy(&error->device_info, |
|---|
| 1750 | 1735 | INTEL_INFO(i915), |
|---|
| 1751 | 1736 | sizeof(error->device_info)); |
|---|
| 1737 | + memcpy(&error->runtime_info, |
|---|
| 1738 | + RUNTIME_INFO(i915), |
|---|
| 1739 | + sizeof(error->runtime_info)); |
|---|
| 1752 | 1740 | error->driver_caps = i915->caps; |
|---|
| 1753 | 1741 | } |
|---|
| 1754 | 1742 | |
|---|
| 1755 | | -static __always_inline void dup_param(const char *type, void *x) |
|---|
| 1743 | +struct i915_gpu_coredump * |
|---|
| 1744 | +i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp) |
|---|
| 1756 | 1745 | { |
|---|
| 1757 | | - if (!__builtin_strcmp(type, "char *")) |
|---|
| 1758 | | - *(void **)x = kstrdup(*(void **)x, GFP_ATOMIC); |
|---|
| 1759 | | -} |
|---|
| 1746 | + struct i915_gpu_coredump *error; |
|---|
| 1760 | 1747 | |
|---|
| 1761 | | -static void capture_params(struct i915_gpu_state *error) |
|---|
| 1762 | | -{ |
|---|
| 1763 | | - error->params = i915_modparams; |
|---|
| 1764 | | -#define DUP(T, x, ...) dup_param(#T, &error->params.x); |
|---|
| 1765 | | - I915_PARAMS_FOR_EACH(DUP); |
|---|
| 1766 | | -#undef DUP |
|---|
| 1767 | | -} |
|---|
| 1748 | + if (!i915->params.error_capture) |
|---|
| 1749 | + return NULL; |
|---|
| 1768 | 1750 | |
|---|
| 1769 | | -static unsigned long capture_find_epoch(const struct i915_gpu_state *error) |
|---|
| 1770 | | -{ |
|---|
| 1771 | | - unsigned long epoch = error->capture; |
|---|
| 1772 | | - int i; |
|---|
| 1773 | | - |
|---|
| 1774 | | - for (i = 0; i < ARRAY_SIZE(error->engine); i++) { |
|---|
| 1775 | | - const struct drm_i915_error_engine *ee = &error->engine[i]; |
|---|
| 1776 | | - |
|---|
| 1777 | | - if (ee->hangcheck_stalled && |
|---|
| 1778 | | - time_before(ee->hangcheck_timestamp, epoch)) |
|---|
| 1779 | | - epoch = ee->hangcheck_timestamp; |
|---|
| 1780 | | - } |
|---|
| 1781 | | - |
|---|
| 1782 | | - return epoch; |
|---|
| 1783 | | -} |
|---|
| 1784 | | - |
|---|
| 1785 | | -static int capture(void *data) |
|---|
| 1786 | | -{ |
|---|
| 1787 | | - struct i915_gpu_state *error = data; |
|---|
| 1788 | | - |
|---|
| 1789 | | - error->time = ktime_get_real(); |
|---|
| 1790 | | - error->boottime = ktime_get_boottime(); |
|---|
| 1791 | | - error->uptime = ktime_sub(ktime_get(), |
|---|
| 1792 | | - error->i915->gt.last_init_time); |
|---|
| 1793 | | - error->capture = jiffies; |
|---|
| 1794 | | - |
|---|
| 1795 | | - capture_params(error); |
|---|
| 1796 | | - capture_gen_state(error); |
|---|
| 1797 | | - capture_uc_state(error); |
|---|
| 1798 | | - capture_reg_state(error); |
|---|
| 1799 | | - gem_record_fences(error); |
|---|
| 1800 | | - gem_record_rings(error); |
|---|
| 1801 | | - capture_active_buffers(error); |
|---|
| 1802 | | - capture_pinned_buffers(error); |
|---|
| 1803 | | - |
|---|
| 1804 | | - error->overlay = intel_overlay_capture_error_state(error->i915); |
|---|
| 1805 | | - error->display = intel_display_capture_error_state(error->i915); |
|---|
| 1806 | | - |
|---|
| 1807 | | - error->epoch = capture_find_epoch(error); |
|---|
| 1808 | | - |
|---|
| 1809 | | - return 0; |
|---|
| 1810 | | -} |
|---|
| 1811 | | - |
|---|
| 1812 | | -#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x)) |
|---|
| 1813 | | - |
|---|
| 1814 | | -struct i915_gpu_state * |
|---|
| 1815 | | -i915_capture_gpu_state(struct drm_i915_private *i915) |
|---|
| 1816 | | -{ |
|---|
| 1817 | | - struct i915_gpu_state *error; |
|---|
| 1818 | | - |
|---|
| 1819 | | - error = kzalloc(sizeof(*error), GFP_ATOMIC); |
|---|
| 1751 | + error = kzalloc(sizeof(*error), gfp); |
|---|
| 1820 | 1752 | if (!error) |
|---|
| 1821 | 1753 | return NULL; |
|---|
| 1822 | 1754 | |
|---|
| 1823 | 1755 | kref_init(&error->ref); |
|---|
| 1824 | 1756 | error->i915 = i915; |
|---|
| 1825 | 1757 | |
|---|
| 1826 | | - stop_machine(capture, error, NULL); |
|---|
| 1758 | + error->time = ktime_get_real(); |
|---|
| 1759 | + error->boottime = ktime_get_boottime(); |
|---|
| 1760 | + error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time); |
|---|
| 1761 | + error->capture = jiffies; |
|---|
| 1762 | + |
|---|
| 1763 | + capture_gen(error); |
|---|
| 1827 | 1764 | |
|---|
| 1828 | 1765 | return error; |
|---|
| 1766 | +} |
|---|
| 1767 | + |
|---|
| 1768 | +#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x)) |
|---|
| 1769 | + |
|---|
| 1770 | +struct intel_gt_coredump * |
|---|
| 1771 | +intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp) |
|---|
| 1772 | +{ |
|---|
| 1773 | + struct intel_gt_coredump *gc; |
|---|
| 1774 | + |
|---|
| 1775 | + gc = kzalloc(sizeof(*gc), gfp); |
|---|
| 1776 | + if (!gc) |
|---|
| 1777 | + return NULL; |
|---|
| 1778 | + |
|---|
| 1779 | + gc->_gt = gt; |
|---|
| 1780 | + gc->awake = intel_gt_pm_is_awake(gt); |
|---|
| 1781 | + |
|---|
| 1782 | + gt_record_regs(gc); |
|---|
| 1783 | + gt_record_fences(gc); |
|---|
| 1784 | + |
|---|
| 1785 | + return gc; |
|---|
| 1786 | +} |
|---|
| 1787 | + |
|---|
| 1788 | +struct i915_vma_compress * |
|---|
| 1789 | +i915_vma_capture_prepare(struct intel_gt_coredump *gt) |
|---|
| 1790 | +{ |
|---|
| 1791 | + struct i915_vma_compress *compress; |
|---|
| 1792 | + |
|---|
| 1793 | + compress = kmalloc(sizeof(*compress), ALLOW_FAIL); |
|---|
| 1794 | + if (!compress) |
|---|
| 1795 | + return NULL; |
|---|
| 1796 | + |
|---|
| 1797 | + if (!compress_init(compress)) { |
|---|
| 1798 | + kfree(compress); |
|---|
| 1799 | + return NULL; |
|---|
| 1800 | + } |
|---|
| 1801 | + |
|---|
| 1802 | + gt_capture_prepare(gt); |
|---|
| 1803 | + |
|---|
| 1804 | + return compress; |
|---|
| 1805 | +} |
|---|
| 1806 | + |
|---|
| 1807 | +void i915_vma_capture_finish(struct intel_gt_coredump *gt, |
|---|
| 1808 | + struct i915_vma_compress *compress) |
|---|
| 1809 | +{ |
|---|
| 1810 | + if (!compress) |
|---|
| 1811 | + return; |
|---|
| 1812 | + |
|---|
| 1813 | + gt_capture_finish(gt); |
|---|
| 1814 | + |
|---|
| 1815 | + compress_fini(compress); |
|---|
| 1816 | + kfree(compress); |
|---|
| 1817 | +} |
|---|
| 1818 | + |
|---|
| 1819 | +struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915) |
|---|
| 1820 | +{ |
|---|
| 1821 | + struct i915_gpu_coredump *error; |
|---|
| 1822 | + |
|---|
| 1823 | + /* Check if GPU capture has been disabled */ |
|---|
| 1824 | + error = READ_ONCE(i915->gpu_error.first_error); |
|---|
| 1825 | + if (IS_ERR(error)) |
|---|
| 1826 | + return error; |
|---|
| 1827 | + |
|---|
| 1828 | + error = i915_gpu_coredump_alloc(i915, ALLOW_FAIL); |
|---|
| 1829 | + if (!error) |
|---|
| 1830 | + return ERR_PTR(-ENOMEM); |
|---|
| 1831 | + |
|---|
| 1832 | + error->gt = intel_gt_coredump_alloc(&i915->gt, ALLOW_FAIL); |
|---|
| 1833 | + if (error->gt) { |
|---|
| 1834 | + struct i915_vma_compress *compress; |
|---|
| 1835 | + |
|---|
| 1836 | + compress = i915_vma_capture_prepare(error->gt); |
|---|
| 1837 | + if (!compress) { |
|---|
| 1838 | + kfree(error->gt); |
|---|
| 1839 | + kfree(error); |
|---|
| 1840 | + return ERR_PTR(-ENOMEM); |
|---|
| 1841 | + } |
|---|
| 1842 | + |
|---|
| 1843 | + gt_record_info(error->gt); |
|---|
| 1844 | + gt_record_engines(error->gt, compress); |
|---|
| 1845 | + |
|---|
| 1846 | + if (INTEL_INFO(i915)->has_gt_uc) |
|---|
| 1847 | + error->gt->uc = gt_record_uc(error->gt, compress); |
|---|
| 1848 | + |
|---|
| 1849 | + i915_vma_capture_finish(error->gt, compress); |
|---|
| 1850 | + |
|---|
| 1851 | + error->simulated |= error->gt->simulated; |
|---|
| 1852 | + } |
|---|
| 1853 | + |
|---|
| 1854 | + error->overlay = intel_overlay_capture_error_state(i915); |
|---|
| 1855 | + error->display = intel_display_capture_error_state(i915); |
|---|
| 1856 | + |
|---|
| 1857 | + return error; |
|---|
| 1858 | +} |
|---|
| 1859 | + |
|---|
| 1860 | +void i915_error_state_store(struct i915_gpu_coredump *error) |
|---|
| 1861 | +{ |
|---|
| 1862 | + struct drm_i915_private *i915; |
|---|
| 1863 | + static bool warned; |
|---|
| 1864 | + |
|---|
| 1865 | + if (IS_ERR_OR_NULL(error)) |
|---|
| 1866 | + return; |
|---|
| 1867 | + |
|---|
| 1868 | + i915 = error->i915; |
|---|
| 1869 | + drm_info(&i915->drm, "%s\n", error_msg(error)); |
|---|
| 1870 | + |
|---|
| 1871 | + if (error->simulated || |
|---|
| 1872 | + cmpxchg(&i915->gpu_error.first_error, NULL, error)) |
|---|
| 1873 | + return; |
|---|
| 1874 | + |
|---|
| 1875 | + i915_gpu_coredump_get(error); |
|---|
| 1876 | + |
|---|
| 1877 | + if (!xchg(&warned, true) && |
|---|
| 1878 | + ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) { |
|---|
| 1879 | + pr_info("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n"); |
|---|
| 1880 | + pr_info("Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/intel/issues/new.\n"); |
|---|
| 1881 | + pr_info("Please see https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs for details.\n"); |
|---|
| 1882 | + pr_info("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n"); |
|---|
| 1883 | + pr_info("The GPU crash dump is required to analyze GPU hangs, so please always attach it.\n"); |
|---|
| 1884 | + pr_info("GPU crash dump saved to /sys/class/drm/card%d/error\n", |
|---|
| 1885 | + i915->drm.primary->index); |
|---|
| 1886 | + } |
|---|
| 1829 | 1887 | } |
|---|
| 1830 | 1888 | |
|---|
| 1831 | 1889 | /** |
|---|
| 1832 | 1890 | * i915_capture_error_state - capture an error record for later analysis |
|---|
| 1833 | 1891 | * @i915: i915 device |
|---|
| 1834 | | - * @engine_mask: the mask of engines triggering the hang |
|---|
| 1835 | | - * @error_msg: a message to insert into the error capture header |
|---|
| 1836 | 1892 | * |
|---|
| 1837 | 1893 | * Should be called when an error is detected (either a hang or an error |
|---|
| 1838 | 1894 | * interrupt) to capture error state from the time of the error. Fills |
|---|
| 1839 | 1895 | * out a structure which becomes available in debugfs for user level tools |
|---|
| 1840 | 1896 | * to pick up. |
|---|
| 1841 | 1897 | */ |
|---|
| 1842 | | -void i915_capture_error_state(struct drm_i915_private *i915, |
|---|
| 1843 | | - u32 engine_mask, |
|---|
| 1844 | | - const char *error_msg) |
|---|
| 1898 | +void i915_capture_error_state(struct drm_i915_private *i915) |
|---|
| 1845 | 1899 | { |
|---|
| 1846 | | - static bool warned; |
|---|
| 1847 | | - struct i915_gpu_state *error; |
|---|
| 1848 | | - unsigned long flags; |
|---|
| 1900 | + struct i915_gpu_coredump *error; |
|---|
| 1849 | 1901 | |
|---|
| 1850 | | - if (!i915_modparams.error_capture) |
|---|
| 1851 | | - return; |
|---|
| 1852 | | - |
|---|
| 1853 | | - if (READ_ONCE(i915->gpu_error.first_error)) |
|---|
| 1854 | | - return; |
|---|
| 1855 | | - |
|---|
| 1856 | | - error = i915_capture_gpu_state(i915); |
|---|
| 1857 | | - if (!error) { |
|---|
| 1858 | | - DRM_DEBUG_DRIVER("out of memory, not capturing error state\n"); |
|---|
| 1902 | + error = i915_gpu_coredump(i915); |
|---|
| 1903 | + if (IS_ERR(error)) { |
|---|
| 1904 | + cmpxchg(&i915->gpu_error.first_error, NULL, error); |
|---|
| 1859 | 1905 | return; |
|---|
| 1860 | 1906 | } |
|---|
| 1861 | 1907 | |
|---|
| 1862 | | - i915_error_capture_msg(i915, error, engine_mask, error_msg); |
|---|
| 1863 | | - DRM_INFO("%s\n", error->error_msg); |
|---|
| 1864 | | - |
|---|
| 1865 | | - if (!error->simulated) { |
|---|
| 1866 | | - spin_lock_irqsave(&i915->gpu_error.lock, flags); |
|---|
| 1867 | | - if (!i915->gpu_error.first_error) { |
|---|
| 1868 | | - i915->gpu_error.first_error = error; |
|---|
| 1869 | | - error = NULL; |
|---|
| 1870 | | - } |
|---|
| 1871 | | - spin_unlock_irqrestore(&i915->gpu_error.lock, flags); |
|---|
| 1872 | | - } |
|---|
| 1873 | | - |
|---|
| 1874 | | - if (error) { |
|---|
| 1875 | | - __i915_gpu_state_free(&error->ref); |
|---|
| 1876 | | - return; |
|---|
| 1877 | | - } |
|---|
| 1878 | | - |
|---|
| 1879 | | - if (!warned && |
|---|
| 1880 | | - ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) { |
|---|
| 1881 | | - DRM_INFO("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n"); |
|---|
| 1882 | | - DRM_INFO("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n"); |
|---|
| 1883 | | - DRM_INFO("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n"); |
|---|
| 1884 | | - DRM_INFO("The gpu crash dump is required to analyze gpu hangs, so please always attach it.\n"); |
|---|
| 1885 | | - DRM_INFO("GPU crash dump saved to /sys/class/drm/card%d/error\n", |
|---|
| 1886 | | - i915->drm.primary->index); |
|---|
| 1887 | | - warned = true; |
|---|
| 1888 | | - } |
|---|
| 1908 | + i915_error_state_store(error); |
|---|
| 1909 | + i915_gpu_coredump_put(error); |
|---|
| 1889 | 1910 | } |
|---|
| 1890 | 1911 | |
|---|
| 1891 | | -struct i915_gpu_state * |
|---|
| 1912 | +struct i915_gpu_coredump * |
|---|
| 1892 | 1913 | i915_first_error_state(struct drm_i915_private *i915) |
|---|
| 1893 | 1914 | { |
|---|
| 1894 | | - struct i915_gpu_state *error; |
|---|
| 1915 | + struct i915_gpu_coredump *error; |
|---|
| 1895 | 1916 | |
|---|
| 1896 | 1917 | spin_lock_irq(&i915->gpu_error.lock); |
|---|
| 1897 | 1918 | error = i915->gpu_error.first_error; |
|---|
| 1898 | | - if (error) |
|---|
| 1899 | | - i915_gpu_state_get(error); |
|---|
| 1919 | + if (!IS_ERR_OR_NULL(error)) |
|---|
| 1920 | + i915_gpu_coredump_get(error); |
|---|
| 1900 | 1921 | spin_unlock_irq(&i915->gpu_error.lock); |
|---|
| 1901 | 1922 | |
|---|
| 1902 | 1923 | return error; |
|---|
| .. | .. |
|---|
| 1904 | 1925 | |
|---|
| 1905 | 1926 | void i915_reset_error_state(struct drm_i915_private *i915) |
|---|
| 1906 | 1927 | { |
|---|
| 1907 | | - struct i915_gpu_state *error; |
|---|
| 1928 | + struct i915_gpu_coredump *error; |
|---|
| 1908 | 1929 | |
|---|
| 1909 | 1930 | spin_lock_irq(&i915->gpu_error.lock); |
|---|
| 1910 | 1931 | error = i915->gpu_error.first_error; |
|---|
| 1911 | | - i915->gpu_error.first_error = NULL; |
|---|
| 1932 | + if (error != ERR_PTR(-ENODEV)) /* if disabled, always disabled */ |
|---|
| 1933 | + i915->gpu_error.first_error = NULL; |
|---|
| 1912 | 1934 | spin_unlock_irq(&i915->gpu_error.lock); |
|---|
| 1913 | 1935 | |
|---|
| 1914 | | - i915_gpu_state_put(error); |
|---|
| 1936 | + if (!IS_ERR_OR_NULL(error)) |
|---|
| 1937 | + i915_gpu_coredump_put(error); |
|---|
| 1938 | +} |
|---|
| 1939 | + |
|---|
| 1940 | +void i915_disable_error_state(struct drm_i915_private *i915, int err) |
|---|
| 1941 | +{ |
|---|
| 1942 | + spin_lock_irq(&i915->gpu_error.lock); |
|---|
| 1943 | + if (!i915->gpu_error.first_error) |
|---|
| 1944 | + i915->gpu_error.first_error = ERR_PTR(err); |
|---|
| 1945 | + spin_unlock_irq(&i915->gpu_error.lock); |
|---|
| 1915 | 1946 | } |
|---|