.. | .. |
---|
27 | 27 | * |
---|
28 | 28 | */ |
---|
29 | 29 | |
---|
30 | | -#include <generated/utsrelease.h> |
---|
31 | | -#include <linux/stop_machine.h> |
---|
32 | | -#include <linux/zlib.h> |
---|
33 | | -#include <drm/drm_print.h> |
---|
34 | 30 | #include <linux/ascii85.h> |
---|
| 31 | +#include <linux/nmi.h> |
---|
| 32 | +#include <linux/pagevec.h> |
---|
| 33 | +#include <linux/scatterlist.h> |
---|
| 34 | +#include <linux/utsname.h> |
---|
| 35 | +#include <linux/zlib.h> |
---|
35 | 36 | |
---|
36 | | -#include "i915_gpu_error.h" |
---|
| 37 | +#include <drm/drm_print.h> |
---|
| 38 | + |
---|
| 39 | +#include "display/intel_atomic.h" |
---|
| 40 | +#include "display/intel_csr.h" |
---|
| 41 | +#include "display/intel_overlay.h" |
---|
| 42 | + |
---|
| 43 | +#include "gem/i915_gem_context.h" |
---|
| 44 | +#include "gem/i915_gem_lmem.h" |
---|
| 45 | +#include "gt/intel_gt.h" |
---|
| 46 | +#include "gt/intel_gt_pm.h" |
---|
| 47 | + |
---|
37 | 48 | #include "i915_drv.h" |
---|
| 49 | +#include "i915_gpu_error.h" |
---|
| 50 | +#include "i915_memcpy.h" |
---|
| 51 | +#include "i915_scatterlist.h" |
---|
38 | 52 | |
---|
39 | | -static inline const struct intel_engine_cs * |
---|
40 | | -engine_lookup(const struct drm_i915_private *i915, unsigned int id) |
---|
| 53 | +#define ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) |
---|
| 54 | +#define ATOMIC_MAYFAIL (GFP_ATOMIC | __GFP_NOWARN) |
---|
| 55 | + |
---|
| 56 | +static void __sg_set_buf(struct scatterlist *sg, |
---|
| 57 | + void *addr, unsigned int len, loff_t it) |
---|
41 | 58 | { |
---|
42 | | - if (id >= I915_NUM_ENGINES) |
---|
43 | | - return NULL; |
---|
44 | | - |
---|
45 | | - return i915->engine[id]; |
---|
| 59 | + sg->page_link = (unsigned long)virt_to_page(addr); |
---|
| 60 | + sg->offset = offset_in_page(addr); |
---|
| 61 | + sg->length = len; |
---|
| 62 | + sg->dma_address = it; |
---|
46 | 63 | } |
---|
47 | 64 | |
---|
48 | | -static inline const char * |
---|
49 | | -__engine_name(const struct intel_engine_cs *engine) |
---|
| 65 | +static bool __i915_error_grow(struct drm_i915_error_state_buf *e, size_t len) |
---|
50 | 66 | { |
---|
51 | | - return engine ? engine->name : ""; |
---|
52 | | -} |
---|
53 | | - |
---|
54 | | -static const char * |
---|
55 | | -engine_name(const struct drm_i915_private *i915, unsigned int id) |
---|
56 | | -{ |
---|
57 | | - return __engine_name(engine_lookup(i915, id)); |
---|
58 | | -} |
---|
59 | | - |
---|
60 | | -static const char *tiling_flag(int tiling) |
---|
61 | | -{ |
---|
62 | | - switch (tiling) { |
---|
63 | | - default: |
---|
64 | | - case I915_TILING_NONE: return ""; |
---|
65 | | - case I915_TILING_X: return " X"; |
---|
66 | | - case I915_TILING_Y: return " Y"; |
---|
67 | | - } |
---|
68 | | -} |
---|
69 | | - |
---|
70 | | -static const char *dirty_flag(int dirty) |
---|
71 | | -{ |
---|
72 | | - return dirty ? " dirty" : ""; |
---|
73 | | -} |
---|
74 | | - |
---|
75 | | -static const char *purgeable_flag(int purgeable) |
---|
76 | | -{ |
---|
77 | | - return purgeable ? " purgeable" : ""; |
---|
78 | | -} |
---|
79 | | - |
---|
80 | | -static bool __i915_error_ok(struct drm_i915_error_state_buf *e) |
---|
81 | | -{ |
---|
82 | | - |
---|
83 | | - if (!e->err && WARN(e->bytes > (e->size - 1), "overflow")) { |
---|
84 | | - e->err = -ENOSPC; |
---|
85 | | - return false; |
---|
86 | | - } |
---|
87 | | - |
---|
88 | | - if (e->bytes == e->size - 1 || e->err) |
---|
| 67 | + if (!len) |
---|
89 | 68 | return false; |
---|
90 | 69 | |
---|
91 | | - return true; |
---|
92 | | -} |
---|
| 70 | + if (e->bytes + len + 1 <= e->size) |
---|
| 71 | + return true; |
---|
93 | 72 | |
---|
94 | | -static bool __i915_error_seek(struct drm_i915_error_state_buf *e, |
---|
95 | | - unsigned len) |
---|
96 | | -{ |
---|
97 | | - if (e->pos + len <= e->start) { |
---|
98 | | - e->pos += len; |
---|
99 | | - return false; |
---|
| 73 | + if (e->bytes) { |
---|
| 74 | + __sg_set_buf(e->cur++, e->buf, e->bytes, e->iter); |
---|
| 75 | + e->iter += e->bytes; |
---|
| 76 | + e->buf = NULL; |
---|
| 77 | + e->bytes = 0; |
---|
100 | 78 | } |
---|
101 | 79 | |
---|
102 | | - /* First vsnprintf needs to fit in its entirety for memmove */ |
---|
103 | | - if (len >= e->size) { |
---|
104 | | - e->err = -EIO; |
---|
105 | | - return false; |
---|
106 | | - } |
---|
| 80 | + if (e->cur == e->end) { |
---|
| 81 | + struct scatterlist *sgl; |
---|
107 | 82 | |
---|
108 | | - return true; |
---|
109 | | -} |
---|
110 | | - |
---|
111 | | -static void __i915_error_advance(struct drm_i915_error_state_buf *e, |
---|
112 | | - unsigned len) |
---|
113 | | -{ |
---|
114 | | - /* If this is first printf in this window, adjust it so that |
---|
115 | | - * start position matches start of the buffer |
---|
116 | | - */ |
---|
117 | | - |
---|
118 | | - if (e->pos < e->start) { |
---|
119 | | - const size_t off = e->start - e->pos; |
---|
120 | | - |
---|
121 | | - /* Should not happen but be paranoid */ |
---|
122 | | - if (off > len || e->bytes) { |
---|
123 | | - e->err = -EIO; |
---|
124 | | - return; |
---|
| 83 | + sgl = (typeof(sgl))__get_free_page(ALLOW_FAIL); |
---|
| 84 | + if (!sgl) { |
---|
| 85 | + e->err = -ENOMEM; |
---|
| 86 | + return false; |
---|
125 | 87 | } |
---|
126 | 88 | |
---|
127 | | - memmove(e->buf, e->buf + off, len - off); |
---|
128 | | - e->bytes = len - off; |
---|
129 | | - e->pos = e->start; |
---|
130 | | - return; |
---|
| 89 | + if (e->cur) { |
---|
| 90 | + e->cur->offset = 0; |
---|
| 91 | + e->cur->length = 0; |
---|
| 92 | + e->cur->page_link = |
---|
| 93 | + (unsigned long)sgl | SG_CHAIN; |
---|
| 94 | + } else { |
---|
| 95 | + e->sgl = sgl; |
---|
| 96 | + } |
---|
| 97 | + |
---|
| 98 | + e->cur = sgl; |
---|
| 99 | + e->end = sgl + SG_MAX_SINGLE_ALLOC - 1; |
---|
131 | 100 | } |
---|
132 | 101 | |
---|
133 | | - e->bytes += len; |
---|
134 | | - e->pos += len; |
---|
| 102 | + e->size = ALIGN(len + 1, SZ_64K); |
---|
| 103 | + e->buf = kmalloc(e->size, ALLOW_FAIL); |
---|
| 104 | + if (!e->buf) { |
---|
| 105 | + e->size = PAGE_ALIGN(len + 1); |
---|
| 106 | + e->buf = kmalloc(e->size, GFP_KERNEL); |
---|
| 107 | + } |
---|
| 108 | + if (!e->buf) { |
---|
| 109 | + e->err = -ENOMEM; |
---|
| 110 | + return false; |
---|
| 111 | + } |
---|
| 112 | + |
---|
| 113 | + return true; |
---|
135 | 114 | } |
---|
136 | 115 | |
---|
137 | 116 | __printf(2, 0) |
---|
138 | 117 | static void i915_error_vprintf(struct drm_i915_error_state_buf *e, |
---|
139 | | - const char *f, va_list args) |
---|
| 118 | + const char *fmt, va_list args) |
---|
140 | 119 | { |
---|
141 | | - unsigned len; |
---|
| 120 | + va_list ap; |
---|
| 121 | + int len; |
---|
142 | 122 | |
---|
143 | | - if (!__i915_error_ok(e)) |
---|
| 123 | + if (e->err) |
---|
144 | 124 | return; |
---|
145 | 125 | |
---|
146 | | - /* Seek the first printf which is hits start position */ |
---|
147 | | - if (e->pos < e->start) { |
---|
148 | | - va_list tmp; |
---|
149 | | - |
---|
150 | | - va_copy(tmp, args); |
---|
151 | | - len = vsnprintf(NULL, 0, f, tmp); |
---|
152 | | - va_end(tmp); |
---|
153 | | - |
---|
154 | | - if (!__i915_error_seek(e, len)) |
---|
155 | | - return; |
---|
| 126 | + va_copy(ap, args); |
---|
| 127 | + len = vsnprintf(NULL, 0, fmt, ap); |
---|
| 128 | + va_end(ap); |
---|
| 129 | + if (len <= 0) { |
---|
| 130 | + e->err = len; |
---|
| 131 | + return; |
---|
156 | 132 | } |
---|
157 | 133 | |
---|
158 | | - len = vsnprintf(e->buf + e->bytes, e->size - e->bytes, f, args); |
---|
159 | | - if (len >= e->size - e->bytes) |
---|
160 | | - len = e->size - e->bytes - 1; |
---|
| 134 | + if (!__i915_error_grow(e, len)) |
---|
| 135 | + return; |
---|
161 | 136 | |
---|
162 | | - __i915_error_advance(e, len); |
---|
| 137 | + GEM_BUG_ON(e->bytes >= e->size); |
---|
| 138 | + len = vscnprintf(e->buf + e->bytes, e->size - e->bytes, fmt, args); |
---|
| 139 | + if (len < 0) { |
---|
| 140 | + e->err = len; |
---|
| 141 | + return; |
---|
| 142 | + } |
---|
| 143 | + e->bytes += len; |
---|
163 | 144 | } |
---|
164 | 145 | |
---|
165 | | -static void i915_error_puts(struct drm_i915_error_state_buf *e, |
---|
166 | | - const char *str) |
---|
| 146 | +static void i915_error_puts(struct drm_i915_error_state_buf *e, const char *str) |
---|
167 | 147 | { |
---|
168 | 148 | unsigned len; |
---|
169 | 149 | |
---|
170 | | - if (!__i915_error_ok(e)) |
---|
| 150 | + if (e->err || !str) |
---|
171 | 151 | return; |
---|
172 | 152 | |
---|
173 | 153 | len = strlen(str); |
---|
| 154 | + if (!__i915_error_grow(e, len)) |
---|
| 155 | + return; |
---|
174 | 156 | |
---|
175 | | - /* Seek the first printf which is hits start position */ |
---|
176 | | - if (e->pos < e->start) { |
---|
177 | | - if (!__i915_error_seek(e, len)) |
---|
178 | | - return; |
---|
179 | | - } |
---|
180 | | - |
---|
181 | | - if (len >= e->size - e->bytes) |
---|
182 | | - len = e->size - e->bytes - 1; |
---|
| 157 | + GEM_BUG_ON(e->bytes + len > e->size); |
---|
183 | 158 | memcpy(e->buf + e->bytes, str, len); |
---|
184 | | - |
---|
185 | | - __i915_error_advance(e, len); |
---|
| 159 | + e->bytes += len; |
---|
186 | 160 | } |
---|
187 | 161 | |
---|
188 | 162 | #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__) |
---|
.. | .. |
---|
203 | 177 | return p; |
---|
204 | 178 | } |
---|
205 | 179 | |
---|
| 180 | +/* single threaded page allocator with a reserved stash for emergencies */ |
---|
| 181 | +static void pool_fini(struct pagevec *pv) |
---|
| 182 | +{ |
---|
| 183 | + pagevec_release(pv); |
---|
| 184 | +} |
---|
| 185 | + |
---|
| 186 | +static int pool_refill(struct pagevec *pv, gfp_t gfp) |
---|
| 187 | +{ |
---|
| 188 | + while (pagevec_space(pv)) { |
---|
| 189 | + struct page *p; |
---|
| 190 | + |
---|
| 191 | + p = alloc_page(gfp); |
---|
| 192 | + if (!p) |
---|
| 193 | + return -ENOMEM; |
---|
| 194 | + |
---|
| 195 | + pagevec_add(pv, p); |
---|
| 196 | + } |
---|
| 197 | + |
---|
| 198 | + return 0; |
---|
| 199 | +} |
---|
| 200 | + |
---|
| 201 | +static int pool_init(struct pagevec *pv, gfp_t gfp) |
---|
| 202 | +{ |
---|
| 203 | + int err; |
---|
| 204 | + |
---|
| 205 | + pagevec_init(pv); |
---|
| 206 | + |
---|
| 207 | + err = pool_refill(pv, gfp); |
---|
| 208 | + if (err) |
---|
| 209 | + pool_fini(pv); |
---|
| 210 | + |
---|
| 211 | + return err; |
---|
| 212 | +} |
---|
| 213 | + |
---|
| 214 | +static void *pool_alloc(struct pagevec *pv, gfp_t gfp) |
---|
| 215 | +{ |
---|
| 216 | + struct page *p; |
---|
| 217 | + |
---|
| 218 | + p = alloc_page(gfp); |
---|
| 219 | + if (!p && pagevec_count(pv)) |
---|
| 220 | + p = pv->pages[--pv->nr]; |
---|
| 221 | + |
---|
| 222 | + return p ? page_address(p) : NULL; |
---|
| 223 | +} |
---|
| 224 | + |
---|
| 225 | +static void pool_free(struct pagevec *pv, void *addr) |
---|
| 226 | +{ |
---|
| 227 | + struct page *p = virt_to_page(addr); |
---|
| 228 | + |
---|
| 229 | + if (pagevec_space(pv)) |
---|
| 230 | + pagevec_add(pv, p); |
---|
| 231 | + else |
---|
| 232 | + __free_page(p); |
---|
| 233 | +} |
---|
| 234 | + |
---|
206 | 235 | #ifdef CONFIG_DRM_I915_COMPRESS_ERROR |
---|
207 | 236 | |
---|
208 | | -struct compress { |
---|
| 237 | +struct i915_vma_compress { |
---|
| 238 | + struct pagevec pool; |
---|
209 | 239 | struct z_stream_s zstream; |
---|
210 | 240 | void *tmp; |
---|
211 | 241 | }; |
---|
212 | 242 | |
---|
213 | | -static bool compress_init(struct compress *c) |
---|
| 243 | +static bool compress_init(struct i915_vma_compress *c) |
---|
214 | 244 | { |
---|
215 | | - struct z_stream_s *zstream = memset(&c->zstream, 0, sizeof(c->zstream)); |
---|
| 245 | + struct z_stream_s *zstream = &c->zstream; |
---|
| 246 | + |
---|
| 247 | + if (pool_init(&c->pool, ALLOW_FAIL)) |
---|
| 248 | + return false; |
---|
216 | 249 | |
---|
217 | 250 | zstream->workspace = |
---|
218 | 251 | kmalloc(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), |
---|
219 | | - GFP_ATOMIC | __GFP_NOWARN); |
---|
220 | | - if (!zstream->workspace) |
---|
221 | | - return false; |
---|
222 | | - |
---|
223 | | - if (zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) != Z_OK) { |
---|
224 | | - kfree(zstream->workspace); |
---|
| 252 | + ALLOW_FAIL); |
---|
| 253 | + if (!zstream->workspace) { |
---|
| 254 | + pool_fini(&c->pool); |
---|
225 | 255 | return false; |
---|
226 | 256 | } |
---|
227 | 257 | |
---|
228 | 258 | c->tmp = NULL; |
---|
229 | 259 | if (i915_has_memcpy_from_wc()) |
---|
230 | | - c->tmp = (void *)__get_free_page(GFP_ATOMIC | __GFP_NOWARN); |
---|
| 260 | + c->tmp = pool_alloc(&c->pool, ALLOW_FAIL); |
---|
231 | 261 | |
---|
232 | 262 | return true; |
---|
233 | 263 | } |
---|
234 | 264 | |
---|
235 | | -static void *compress_next_page(struct drm_i915_error_object *dst) |
---|
| 265 | +static bool compress_start(struct i915_vma_compress *c) |
---|
236 | 266 | { |
---|
237 | | - unsigned long page; |
---|
| 267 | + struct z_stream_s *zstream = &c->zstream; |
---|
| 268 | + void *workspace = zstream->workspace; |
---|
| 269 | + |
---|
| 270 | + memset(zstream, 0, sizeof(*zstream)); |
---|
| 271 | + zstream->workspace = workspace; |
---|
| 272 | + |
---|
| 273 | + return zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) == Z_OK; |
---|
| 274 | +} |
---|
| 275 | + |
---|
| 276 | +static void *compress_next_page(struct i915_vma_compress *c, |
---|
| 277 | + struct i915_vma_coredump *dst) |
---|
| 278 | +{ |
---|
| 279 | + void *page; |
---|
238 | 280 | |
---|
239 | 281 | if (dst->page_count >= dst->num_pages) |
---|
240 | 282 | return ERR_PTR(-ENOSPC); |
---|
241 | 283 | |
---|
242 | | - page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); |
---|
| 284 | + page = pool_alloc(&c->pool, ALLOW_FAIL); |
---|
243 | 285 | if (!page) |
---|
244 | 286 | return ERR_PTR(-ENOMEM); |
---|
245 | 287 | |
---|
246 | | - return dst->pages[dst->page_count++] = (void *)page; |
---|
| 288 | + return dst->pages[dst->page_count++] = page; |
---|
247 | 289 | } |
---|
248 | 290 | |
---|
249 | | -static int compress_page(struct compress *c, |
---|
| 291 | +static int compress_page(struct i915_vma_compress *c, |
---|
250 | 292 | void *src, |
---|
251 | | - struct drm_i915_error_object *dst) |
---|
| 293 | + struct i915_vma_coredump *dst, |
---|
| 294 | + bool wc) |
---|
252 | 295 | { |
---|
253 | 296 | struct z_stream_s *zstream = &c->zstream; |
---|
254 | 297 | |
---|
255 | 298 | zstream->next_in = src; |
---|
256 | | - if (c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE)) |
---|
| 299 | + if (wc && c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE)) |
---|
257 | 300 | zstream->next_in = c->tmp; |
---|
258 | 301 | zstream->avail_in = PAGE_SIZE; |
---|
259 | 302 | |
---|
260 | 303 | do { |
---|
261 | 304 | if (zstream->avail_out == 0) { |
---|
262 | | - zstream->next_out = compress_next_page(dst); |
---|
| 305 | + zstream->next_out = compress_next_page(c, dst); |
---|
263 | 306 | if (IS_ERR(zstream->next_out)) |
---|
264 | 307 | return PTR_ERR(zstream->next_out); |
---|
265 | 308 | |
---|
.. | .. |
---|
279 | 322 | return 0; |
---|
280 | 323 | } |
---|
281 | 324 | |
---|
282 | | -static int compress_flush(struct compress *c, |
---|
283 | | - struct drm_i915_error_object *dst) |
---|
| 325 | +static int compress_flush(struct i915_vma_compress *c, |
---|
| 326 | + struct i915_vma_coredump *dst) |
---|
284 | 327 | { |
---|
285 | 328 | struct z_stream_s *zstream = &c->zstream; |
---|
286 | 329 | |
---|
287 | 330 | do { |
---|
288 | 331 | switch (zlib_deflate(zstream, Z_FINISH)) { |
---|
289 | 332 | case Z_OK: /* more space requested */ |
---|
290 | | - zstream->next_out = compress_next_page(dst); |
---|
| 333 | + zstream->next_out = compress_next_page(c, dst); |
---|
291 | 334 | if (IS_ERR(zstream->next_out)) |
---|
292 | 335 | return PTR_ERR(zstream->next_out); |
---|
293 | 336 | |
---|
.. | .. |
---|
308 | 351 | return 0; |
---|
309 | 352 | } |
---|
310 | 353 | |
---|
311 | | -static void compress_fini(struct compress *c, |
---|
312 | | - struct drm_i915_error_object *dst) |
---|
| 354 | +static void compress_finish(struct i915_vma_compress *c) |
---|
313 | 355 | { |
---|
314 | | - struct z_stream_s *zstream = &c->zstream; |
---|
| 356 | + zlib_deflateEnd(&c->zstream); |
---|
| 357 | +} |
---|
315 | 358 | |
---|
316 | | - zlib_deflateEnd(zstream); |
---|
317 | | - kfree(zstream->workspace); |
---|
| 359 | +static void compress_fini(struct i915_vma_compress *c) |
---|
| 360 | +{ |
---|
| 361 | + kfree(c->zstream.workspace); |
---|
318 | 362 | if (c->tmp) |
---|
319 | | - free_page((unsigned long)c->tmp); |
---|
| 363 | + pool_free(&c->pool, c->tmp); |
---|
| 364 | + pool_fini(&c->pool); |
---|
320 | 365 | } |
---|
321 | 366 | |
---|
322 | 367 | static void err_compression_marker(struct drm_i915_error_state_buf *m) |
---|
.. | .. |
---|
326 | 371 | |
---|
327 | 372 | #else |
---|
328 | 373 | |
---|
329 | | -struct compress { |
---|
| 374 | +struct i915_vma_compress { |
---|
| 375 | + struct pagevec pool; |
---|
330 | 376 | }; |
---|
331 | 377 | |
---|
332 | | -static bool compress_init(struct compress *c) |
---|
| 378 | +static bool compress_init(struct i915_vma_compress *c) |
---|
| 379 | +{ |
---|
| 380 | + return pool_init(&c->pool, ALLOW_FAIL) == 0; |
---|
| 381 | +} |
---|
| 382 | + |
---|
| 383 | +static bool compress_start(struct i915_vma_compress *c) |
---|
333 | 384 | { |
---|
334 | 385 | return true; |
---|
335 | 386 | } |
---|
336 | 387 | |
---|
337 | | -static int compress_page(struct compress *c, |
---|
| 388 | +static int compress_page(struct i915_vma_compress *c, |
---|
338 | 389 | void *src, |
---|
339 | | - struct drm_i915_error_object *dst) |
---|
| 390 | + struct i915_vma_coredump *dst, |
---|
| 391 | + bool wc) |
---|
340 | 392 | { |
---|
341 | | - unsigned long page; |
---|
342 | 393 | void *ptr; |
---|
343 | 394 | |
---|
344 | | - page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); |
---|
345 | | - if (!page) |
---|
| 395 | + ptr = pool_alloc(&c->pool, ALLOW_FAIL); |
---|
| 396 | + if (!ptr) |
---|
346 | 397 | return -ENOMEM; |
---|
347 | 398 | |
---|
348 | | - ptr = (void *)page; |
---|
349 | | - if (!i915_memcpy_from_wc(ptr, src, PAGE_SIZE)) |
---|
| 399 | + if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE))) |
---|
350 | 400 | memcpy(ptr, src, PAGE_SIZE); |
---|
351 | 401 | dst->pages[dst->page_count++] = ptr; |
---|
352 | 402 | cond_resched(); |
---|
.. | .. |
---|
354 | 404 | return 0; |
---|
355 | 405 | } |
---|
356 | 406 | |
---|
357 | | -static int compress_flush(struct compress *c, |
---|
358 | | - struct drm_i915_error_object *dst) |
---|
| 407 | +static int compress_flush(struct i915_vma_compress *c, |
---|
| 408 | + struct i915_vma_coredump *dst) |
---|
359 | 409 | { |
---|
360 | 410 | return 0; |
---|
361 | 411 | } |
---|
362 | 412 | |
---|
363 | | -static void compress_fini(struct compress *c, |
---|
364 | | - struct drm_i915_error_object *dst) |
---|
| 413 | +static void compress_finish(struct i915_vma_compress *c) |
---|
365 | 414 | { |
---|
| 415 | +} |
---|
| 416 | + |
---|
| 417 | +static void compress_fini(struct i915_vma_compress *c) |
---|
| 418 | +{ |
---|
| 419 | + pool_fini(&c->pool); |
---|
366 | 420 | } |
---|
367 | 421 | |
---|
368 | 422 | static void err_compression_marker(struct drm_i915_error_state_buf *m) |
---|
.. | .. |
---|
372 | 426 | |
---|
373 | 427 | #endif |
---|
374 | 428 | |
---|
375 | | -static void print_error_buffers(struct drm_i915_error_state_buf *m, |
---|
376 | | - const char *name, |
---|
377 | | - struct drm_i915_error_buffer *err, |
---|
378 | | - int count) |
---|
379 | | -{ |
---|
380 | | - err_printf(m, "%s [%d]:\n", name, count); |
---|
381 | | - |
---|
382 | | - while (count--) { |
---|
383 | | - err_printf(m, " %08x_%08x %8u %02x %02x %02x", |
---|
384 | | - upper_32_bits(err->gtt_offset), |
---|
385 | | - lower_32_bits(err->gtt_offset), |
---|
386 | | - err->size, |
---|
387 | | - err->read_domains, |
---|
388 | | - err->write_domain, |
---|
389 | | - err->wseqno); |
---|
390 | | - err_puts(m, tiling_flag(err->tiling)); |
---|
391 | | - err_puts(m, dirty_flag(err->dirty)); |
---|
392 | | - err_puts(m, purgeable_flag(err->purgeable)); |
---|
393 | | - err_puts(m, err->userptr ? " userptr" : ""); |
---|
394 | | - err_puts(m, err->engine != -1 ? " " : ""); |
---|
395 | | - err_puts(m, engine_name(m->i915, err->engine)); |
---|
396 | | - err_puts(m, i915_cache_level_str(m->i915, err->cache_level)); |
---|
397 | | - |
---|
398 | | - if (err->name) |
---|
399 | | - err_printf(m, " (name: %d)", err->name); |
---|
400 | | - if (err->fence_reg != I915_FENCE_REG_NONE) |
---|
401 | | - err_printf(m, " (fence: %d)", err->fence_reg); |
---|
402 | | - |
---|
403 | | - err_puts(m, "\n"); |
---|
404 | | - err++; |
---|
405 | | - } |
---|
406 | | -} |
---|
407 | | - |
---|
408 | 429 | static void error_print_instdone(struct drm_i915_error_state_buf *m, |
---|
409 | | - const struct drm_i915_error_engine *ee) |
---|
| 430 | + const struct intel_engine_coredump *ee) |
---|
410 | 431 | { |
---|
| 432 | + const struct sseu_dev_info *sseu = &ee->engine->gt->info.sseu; |
---|
411 | 433 | int slice; |
---|
412 | 434 | int subslice; |
---|
413 | 435 | |
---|
414 | 436 | err_printf(m, " INSTDONE: 0x%08x\n", |
---|
415 | 437 | ee->instdone.instdone); |
---|
416 | 438 | |
---|
417 | | - if (ee->engine_id != RCS || INTEL_GEN(m->i915) <= 3) |
---|
| 439 | + if (ee->engine->class != RENDER_CLASS || INTEL_GEN(m->i915) <= 3) |
---|
418 | 440 | return; |
---|
419 | 441 | |
---|
420 | 442 | err_printf(m, " SC_INSTDONE: 0x%08x\n", |
---|
.. | .. |
---|
423 | 445 | if (INTEL_GEN(m->i915) <= 6) |
---|
424 | 446 | return; |
---|
425 | 447 | |
---|
426 | | - for_each_instdone_slice_subslice(m->i915, slice, subslice) |
---|
| 448 | + for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) |
---|
427 | 449 | err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", |
---|
428 | 450 | slice, subslice, |
---|
429 | 451 | ee->instdone.sampler[slice][subslice]); |
---|
430 | 452 | |
---|
431 | | - for_each_instdone_slice_subslice(m->i915, slice, subslice) |
---|
| 453 | + for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) |
---|
432 | 454 | err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", |
---|
433 | 455 | slice, subslice, |
---|
434 | 456 | ee->instdone.row[slice][subslice]); |
---|
435 | | -} |
---|
436 | 457 | |
---|
437 | | -static const char *bannable(const struct drm_i915_error_context *ctx) |
---|
438 | | -{ |
---|
439 | | - return ctx->bannable ? "" : " (unbannable)"; |
---|
| 458 | + if (INTEL_GEN(m->i915) < 12) |
---|
| 459 | + return; |
---|
| 460 | + |
---|
| 461 | + err_printf(m, " SC_INSTDONE_EXTRA: 0x%08x\n", |
---|
| 462 | + ee->instdone.slice_common_extra[0]); |
---|
| 463 | + err_printf(m, " SC_INSTDONE_EXTRA2: 0x%08x\n", |
---|
| 464 | + ee->instdone.slice_common_extra[1]); |
---|
440 | 465 | } |
---|
441 | 466 | |
---|
442 | 467 | static void error_print_request(struct drm_i915_error_state_buf *m, |
---|
443 | 468 | const char *prefix, |
---|
444 | | - const struct drm_i915_error_request *erq, |
---|
445 | | - const unsigned long epoch) |
---|
| 469 | + const struct i915_request_coredump *erq) |
---|
446 | 470 | { |
---|
447 | 471 | if (!erq->seqno) |
---|
448 | 472 | return; |
---|
449 | 473 | |
---|
450 | | - err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", |
---|
451 | | - prefix, erq->pid, erq->ban_score, |
---|
452 | | - erq->context, erq->seqno, erq->sched_attr.priority, |
---|
453 | | - jiffies_to_msecs(erq->jiffies - epoch), |
---|
454 | | - erq->start, erq->head, erq->tail); |
---|
| 474 | + err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, head %08x, tail %08x\n", |
---|
| 475 | + prefix, erq->pid, erq->context, erq->seqno, |
---|
| 476 | + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, |
---|
| 477 | + &erq->flags) ? "!" : "", |
---|
| 478 | + test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, |
---|
| 479 | + &erq->flags) ? "+" : "", |
---|
| 480 | + erq->sched_attr.priority, |
---|
| 481 | + erq->head, erq->tail); |
---|
455 | 482 | } |
---|
456 | 483 | |
---|
457 | 484 | static void error_print_context(struct drm_i915_error_state_buf *m, |
---|
458 | 485 | const char *header, |
---|
459 | | - const struct drm_i915_error_context *ctx) |
---|
| 486 | + const struct i915_gem_context_coredump *ctx) |
---|
460 | 487 | { |
---|
461 | | - err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d\n", |
---|
462 | | - header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, |
---|
463 | | - ctx->sched_attr.priority, ctx->ban_score, bannable(ctx), |
---|
464 | | - ctx->guilty, ctx->active); |
---|
| 488 | + const u32 period = RUNTIME_INFO(m->i915)->cs_timestamp_period_ns; |
---|
| 489 | + |
---|
| 490 | + err_printf(m, "%s%s[%d] prio %d, guilty %d active %d, runtime total %lluns, avg %lluns\n", |
---|
| 491 | + header, ctx->comm, ctx->pid, ctx->sched_attr.priority, |
---|
| 492 | + ctx->guilty, ctx->active, |
---|
| 493 | + ctx->total_runtime * period, |
---|
| 494 | + mul_u32_u32(ctx->avg_runtime, period)); |
---|
| 495 | +} |
---|
| 496 | + |
---|
| 497 | +static struct i915_vma_coredump * |
---|
| 498 | +__find_vma(struct i915_vma_coredump *vma, const char *name) |
---|
| 499 | +{ |
---|
| 500 | + while (vma) { |
---|
| 501 | + if (strcmp(vma->name, name) == 0) |
---|
| 502 | + return vma; |
---|
| 503 | + vma = vma->next; |
---|
| 504 | + } |
---|
| 505 | + |
---|
| 506 | + return NULL; |
---|
| 507 | +} |
---|
| 508 | + |
---|
| 509 | +static struct i915_vma_coredump * |
---|
| 510 | +find_batch(const struct intel_engine_coredump *ee) |
---|
| 511 | +{ |
---|
| 512 | + return __find_vma(ee->vma, "batch"); |
---|
465 | 513 | } |
---|
466 | 514 | |
---|
467 | 515 | static void error_print_engine(struct drm_i915_error_state_buf *m, |
---|
468 | | - const struct drm_i915_error_engine *ee, |
---|
469 | | - const unsigned long epoch) |
---|
| 516 | + const struct intel_engine_coredump *ee) |
---|
470 | 517 | { |
---|
| 518 | + struct i915_vma_coredump *batch; |
---|
471 | 519 | int n; |
---|
472 | 520 | |
---|
473 | | - err_printf(m, "%s command stream:\n", |
---|
474 | | - engine_name(m->i915, ee->engine_id)); |
---|
475 | | - err_printf(m, " IDLE?: %s\n", yesno(ee->idle)); |
---|
| 521 | + err_printf(m, "%s command stream:\n", ee->engine->name); |
---|
| 522 | + err_printf(m, " CCID: 0x%08x\n", ee->ccid); |
---|
476 | 523 | err_printf(m, " START: 0x%08x\n", ee->start); |
---|
477 | 524 | err_printf(m, " HEAD: 0x%08x [0x%08x]\n", ee->head, ee->rq_head); |
---|
478 | 525 | err_printf(m, " TAIL: 0x%08x [0x%08x, 0x%08x]\n", |
---|
.. | .. |
---|
484 | 531 | (u32)(ee->acthd>>32), (u32)ee->acthd); |
---|
485 | 532 | err_printf(m, " IPEIR: 0x%08x\n", ee->ipeir); |
---|
486 | 533 | err_printf(m, " IPEHR: 0x%08x\n", ee->ipehr); |
---|
| 534 | + err_printf(m, " ESR: 0x%08x\n", ee->esr); |
---|
487 | 535 | |
---|
488 | 536 | error_print_instdone(m, ee); |
---|
489 | 537 | |
---|
490 | | - if (ee->batchbuffer) { |
---|
491 | | - u64 start = ee->batchbuffer->gtt_offset; |
---|
492 | | - u64 end = start + ee->batchbuffer->gtt_size; |
---|
| 538 | + batch = find_batch(ee); |
---|
| 539 | + if (batch) { |
---|
| 540 | + u64 start = batch->gtt_offset; |
---|
| 541 | + u64 end = start + batch->gtt_size; |
---|
493 | 542 | |
---|
494 | 543 | err_printf(m, " batch: [0x%08x_%08x, 0x%08x_%08x]\n", |
---|
495 | 544 | upper_32_bits(start), lower_32_bits(start), |
---|
.. | .. |
---|
507 | 556 | if (INTEL_GEN(m->i915) >= 6) { |
---|
508 | 557 | err_printf(m, " RC PSMI: 0x%08x\n", ee->rc_psmi); |
---|
509 | 558 | err_printf(m, " FAULT_REG: 0x%08x\n", ee->fault_reg); |
---|
510 | | - err_printf(m, " SYNC_0: 0x%08x\n", |
---|
511 | | - ee->semaphore_mboxes[0]); |
---|
512 | | - err_printf(m, " SYNC_1: 0x%08x\n", |
---|
513 | | - ee->semaphore_mboxes[1]); |
---|
514 | | - if (HAS_VEBOX(m->i915)) |
---|
515 | | - err_printf(m, " SYNC_2: 0x%08x\n", |
---|
516 | | - ee->semaphore_mboxes[2]); |
---|
517 | 559 | } |
---|
518 | | - if (USES_PPGTT(m->i915)) { |
---|
| 560 | + if (HAS_PPGTT(m->i915)) { |
---|
519 | 561 | err_printf(m, " GFX_MODE: 0x%08x\n", ee->vm_info.gfx_mode); |
---|
520 | 562 | |
---|
521 | 563 | if (INTEL_GEN(m->i915) >= 8) { |
---|
.. | .. |
---|
528 | 570 | ee->vm_info.pp_dir_base); |
---|
529 | 571 | } |
---|
530 | 572 | } |
---|
531 | | - err_printf(m, " seqno: 0x%08x\n", ee->seqno); |
---|
532 | | - err_printf(m, " last_seqno: 0x%08x\n", ee->last_seqno); |
---|
533 | | - err_printf(m, " waiting: %s\n", yesno(ee->waiting)); |
---|
534 | | - err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head); |
---|
535 | | - err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail); |
---|
536 | | - err_printf(m, " hangcheck stall: %s\n", yesno(ee->hangcheck_stalled)); |
---|
537 | | - err_printf(m, " hangcheck action: %s\n", |
---|
538 | | - hangcheck_action_to_str(ee->hangcheck_action)); |
---|
539 | | - err_printf(m, " hangcheck action timestamp: %dms (%lu%s)\n", |
---|
540 | | - jiffies_to_msecs(ee->hangcheck_timestamp - epoch), |
---|
541 | | - ee->hangcheck_timestamp, |
---|
542 | | - ee->hangcheck_timestamp == epoch ? "; epoch" : ""); |
---|
543 | 573 | err_printf(m, " engine reset count: %u\n", ee->reset_count); |
---|
544 | 574 | |
---|
545 | 575 | for (n = 0; n < ee->num_ports; n++) { |
---|
546 | 576 | err_printf(m, " ELSP[%d]:", n); |
---|
547 | | - error_print_request(m, " ", &ee->execlist[n], epoch); |
---|
| 577 | + error_print_request(m, " ", &ee->execlist[n]); |
---|
548 | 578 | } |
---|
549 | 579 | |
---|
550 | 580 | error_print_context(m, " Active context: ", &ee->context); |
---|
.. | .. |
---|
559 | 589 | va_end(args); |
---|
560 | 590 | } |
---|
561 | 591 | |
---|
562 | | -static void print_error_obj(struct drm_i915_error_state_buf *m, |
---|
563 | | - struct intel_engine_cs *engine, |
---|
564 | | - const char *name, |
---|
565 | | - struct drm_i915_error_object *obj) |
---|
| 592 | +static void print_error_vma(struct drm_i915_error_state_buf *m, |
---|
| 593 | + const struct intel_engine_cs *engine, |
---|
| 594 | + const struct i915_vma_coredump *vma) |
---|
566 | 595 | { |
---|
567 | 596 | char out[ASCII85_BUFSZ]; |
---|
568 | 597 | int page; |
---|
569 | 598 | |
---|
570 | | - if (!obj) |
---|
| 599 | + if (!vma) |
---|
571 | 600 | return; |
---|
572 | 601 | |
---|
573 | | - if (name) { |
---|
574 | | - err_printf(m, "%s --- %s = 0x%08x %08x\n", |
---|
575 | | - engine ? engine->name : "global", name, |
---|
576 | | - upper_32_bits(obj->gtt_offset), |
---|
577 | | - lower_32_bits(obj->gtt_offset)); |
---|
578 | | - } |
---|
| 602 | + err_printf(m, "%s --- %s = 0x%08x %08x\n", |
---|
| 603 | + engine ? engine->name : "global", vma->name, |
---|
| 604 | + upper_32_bits(vma->gtt_offset), |
---|
| 605 | + lower_32_bits(vma->gtt_offset)); |
---|
| 606 | + |
---|
| 607 | + if (vma->gtt_page_sizes > I915_GTT_PAGE_SIZE_4K) |
---|
| 608 | + err_printf(m, "gtt_page_sizes = 0x%08x\n", vma->gtt_page_sizes); |
---|
579 | 609 | |
---|
580 | 610 | err_compression_marker(m); |
---|
581 | | - for (page = 0; page < obj->page_count; page++) { |
---|
| 611 | + for (page = 0; page < vma->page_count; page++) { |
---|
582 | 612 | int i, len; |
---|
583 | 613 | |
---|
584 | 614 | len = PAGE_SIZE; |
---|
585 | | - if (page == obj->page_count - 1) |
---|
586 | | - len -= obj->unused; |
---|
| 615 | + if (page == vma->page_count - 1) |
---|
| 616 | + len -= vma->unused; |
---|
587 | 617 | len = ascii85_encode_len(len); |
---|
588 | 618 | |
---|
589 | 619 | for (i = 0; i < len; i++) |
---|
590 | | - err_puts(m, ascii85_encode(obj->pages[page][i], out)); |
---|
| 620 | + err_puts(m, ascii85_encode(vma->pages[page][i], out)); |
---|
591 | 621 | } |
---|
592 | 622 | err_puts(m, "\n"); |
---|
593 | 623 | } |
---|
594 | 624 | |
---|
595 | 625 | static void err_print_capabilities(struct drm_i915_error_state_buf *m, |
---|
596 | | - const struct intel_device_info *info, |
---|
597 | | - const struct intel_driver_caps *caps) |
---|
| 626 | + struct i915_gpu_coredump *error) |
---|
598 | 627 | { |
---|
599 | 628 | struct drm_printer p = i915_error_printer(m); |
---|
600 | 629 | |
---|
601 | | - intel_device_info_dump_flags(info, &p); |
---|
602 | | - intel_driver_caps_print(caps, &p); |
---|
603 | | - intel_device_info_dump_topology(&info->sseu, &p); |
---|
| 630 | + intel_device_info_print_static(&error->device_info, &p); |
---|
| 631 | + intel_device_info_print_runtime(&error->runtime_info, &p); |
---|
| 632 | + intel_driver_caps_print(&error->driver_caps, &p); |
---|
604 | 633 | } |
---|
605 | 634 | |
---|
606 | 635 | static void err_print_params(struct drm_i915_error_state_buf *m, |
---|
.. | .. |
---|
624 | 653 | } |
---|
625 | 654 | |
---|
626 | 655 | static void err_print_uc(struct drm_i915_error_state_buf *m, |
---|
627 | | - const struct i915_error_uc *error_uc) |
---|
| 656 | + const struct intel_uc_coredump *error_uc) |
---|
628 | 657 | { |
---|
629 | 658 | struct drm_printer p = i915_error_printer(m); |
---|
630 | | - const struct i915_gpu_state *error = |
---|
631 | | - container_of(error_uc, typeof(*error), uc); |
---|
632 | | - |
---|
633 | | - if (!error->device_info.has_guc) |
---|
634 | | - return; |
---|
635 | 659 | |
---|
636 | 660 | intel_uc_fw_dump(&error_uc->guc_fw, &p); |
---|
637 | 661 | intel_uc_fw_dump(&error_uc->huc_fw, &p); |
---|
638 | | - print_error_obj(m, NULL, "GuC log buffer", error_uc->guc_log); |
---|
| 662 | + print_error_vma(m, NULL, error_uc->guc_log); |
---|
639 | 663 | } |
---|
640 | 664 | |
---|
641 | | -int i915_error_state_to_str(struct drm_i915_error_state_buf *m, |
---|
642 | | - const struct i915_gpu_state *error) |
---|
| 665 | +static void err_free_sgl(struct scatterlist *sgl) |
---|
643 | 666 | { |
---|
644 | | - struct drm_i915_private *dev_priv = m->i915; |
---|
645 | | - struct drm_i915_error_object *obj; |
---|
646 | | - struct timespec64 ts; |
---|
647 | | - int i, j; |
---|
| 667 | + while (sgl) { |
---|
| 668 | + struct scatterlist *sg; |
---|
648 | 669 | |
---|
649 | | - if (!error) { |
---|
650 | | - err_printf(m, "No error state collected\n"); |
---|
651 | | - return 0; |
---|
| 670 | + for (sg = sgl; !sg_is_chain(sg); sg++) { |
---|
| 671 | + kfree(sg_virt(sg)); |
---|
| 672 | + if (sg_is_last(sg)) |
---|
| 673 | + break; |
---|
| 674 | + } |
---|
| 675 | + |
---|
| 676 | + sg = sg_is_last(sg) ? NULL : sg_chain_ptr(sg); |
---|
| 677 | + free_page((unsigned long)sgl); |
---|
| 678 | + sgl = sg; |
---|
652 | 679 | } |
---|
| 680 | +} |
---|
| 681 | + |
---|
| 682 | +static void err_print_gt_info(struct drm_i915_error_state_buf *m, |
---|
| 683 | + struct intel_gt_coredump *gt) |
---|
| 684 | +{ |
---|
| 685 | + struct drm_printer p = i915_error_printer(m); |
---|
| 686 | + |
---|
| 687 | + intel_gt_info_print(>->info, &p); |
---|
| 688 | + intel_sseu_print_topology(>->info.sseu, &p); |
---|
| 689 | +} |
---|
| 690 | + |
---|
| 691 | +static void err_print_gt(struct drm_i915_error_state_buf *m, |
---|
| 692 | + struct intel_gt_coredump *gt) |
---|
| 693 | +{ |
---|
| 694 | + const struct intel_engine_coredump *ee; |
---|
| 695 | + int i; |
---|
| 696 | + |
---|
| 697 | + err_printf(m, "GT awake: %s\n", yesno(gt->awake)); |
---|
| 698 | + err_printf(m, "EIR: 0x%08x\n", gt->eir); |
---|
| 699 | + err_printf(m, "IER: 0x%08x\n", gt->ier); |
---|
| 700 | + for (i = 0; i < gt->ngtier; i++) |
---|
| 701 | + err_printf(m, "GTIER[%d]: 0x%08x\n", i, gt->gtier[i]); |
---|
| 702 | + err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er); |
---|
| 703 | + err_printf(m, "FORCEWAKE: 0x%08x\n", gt->forcewake); |
---|
| 704 | + err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr); |
---|
| 705 | + |
---|
| 706 | + for (i = 0; i < gt->nfence; i++) |
---|
| 707 | + err_printf(m, " fence[%d] = %08llx\n", i, gt->fence[i]); |
---|
| 708 | + |
---|
| 709 | + if (IS_GEN_RANGE(m->i915, 6, 11)) { |
---|
| 710 | + err_printf(m, "ERROR: 0x%08x\n", gt->error); |
---|
| 711 | + err_printf(m, "DONE_REG: 0x%08x\n", gt->done_reg); |
---|
| 712 | + } |
---|
| 713 | + |
---|
| 714 | + if (INTEL_GEN(m->i915) >= 8) |
---|
| 715 | + err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n", |
---|
| 716 | + gt->fault_data1, gt->fault_data0); |
---|
| 717 | + |
---|
| 718 | + if (IS_GEN(m->i915, 7)) |
---|
| 719 | + err_printf(m, "ERR_INT: 0x%08x\n", gt->err_int); |
---|
| 720 | + |
---|
| 721 | + if (IS_GEN_RANGE(m->i915, 8, 11)) |
---|
| 722 | + err_printf(m, "GTT_CACHE_EN: 0x%08x\n", gt->gtt_cache); |
---|
| 723 | + |
---|
| 724 | + if (IS_GEN(m->i915, 12)) |
---|
| 725 | + err_printf(m, "AUX_ERR_DBG: 0x%08x\n", gt->aux_err); |
---|
| 726 | + |
---|
| 727 | + if (INTEL_GEN(m->i915) >= 12) { |
---|
| 728 | + int i; |
---|
| 729 | + |
---|
| 730 | + for (i = 0; i < GEN12_SFC_DONE_MAX; i++) { |
---|
| 731 | + /* |
---|
| 732 | + * SFC_DONE resides in the VD forcewake domain, so it |
---|
| 733 | + * only exists if the corresponding VCS engine is |
---|
| 734 | + * present. |
---|
| 735 | + */ |
---|
| 736 | + if (!HAS_ENGINE(gt->_gt, _VCS(i * 2))) |
---|
| 737 | + continue; |
---|
| 738 | + |
---|
| 739 | + err_printf(m, " SFC_DONE[%d]: 0x%08x\n", i, |
---|
| 740 | + gt->sfc_done[i]); |
---|
| 741 | + } |
---|
| 742 | + |
---|
| 743 | + err_printf(m, " GAM_DONE: 0x%08x\n", gt->gam_done); |
---|
| 744 | + } |
---|
| 745 | + |
---|
| 746 | + for (ee = gt->engine; ee; ee = ee->next) { |
---|
| 747 | + const struct i915_vma_coredump *vma; |
---|
| 748 | + |
---|
| 749 | + error_print_engine(m, ee); |
---|
| 750 | + for (vma = ee->vma; vma; vma = vma->next) |
---|
| 751 | + print_error_vma(m, ee->engine, vma); |
---|
| 752 | + } |
---|
| 753 | + |
---|
| 754 | + if (gt->uc) |
---|
| 755 | + err_print_uc(m, gt->uc); |
---|
| 756 | + |
---|
| 757 | + err_print_gt_info(m, gt); |
---|
| 758 | +} |
---|
| 759 | + |
---|
| 760 | +static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, |
---|
| 761 | + struct i915_gpu_coredump *error) |
---|
| 762 | +{ |
---|
| 763 | + const struct intel_engine_coredump *ee; |
---|
| 764 | + struct timespec64 ts; |
---|
653 | 765 | |
---|
654 | 766 | if (*error->error_msg) |
---|
655 | 767 | err_printf(m, "%s\n", error->error_msg); |
---|
656 | | - err_printf(m, "Kernel: " UTS_RELEASE "\n"); |
---|
| 768 | + err_printf(m, "Kernel: %s %s\n", |
---|
| 769 | + init_utsname()->release, |
---|
| 770 | + init_utsname()->machine); |
---|
| 771 | + err_printf(m, "Driver: %s\n", DRIVER_DATE); |
---|
657 | 772 | ts = ktime_to_timespec64(error->time); |
---|
658 | 773 | err_printf(m, "Time: %lld s %ld us\n", |
---|
659 | 774 | (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC); |
---|
.. | .. |
---|
663 | 778 | ts = ktime_to_timespec64(error->uptime); |
---|
664 | 779 | err_printf(m, "Uptime: %lld s %ld us\n", |
---|
665 | 780 | (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC); |
---|
666 | | - err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ); |
---|
667 | | - err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n", |
---|
668 | | - error->capture, |
---|
669 | | - jiffies_to_msecs(jiffies - error->capture), |
---|
670 | | - jiffies_to_msecs(error->capture - error->epoch)); |
---|
| 781 | + err_printf(m, "Capture: %lu jiffies; %d ms ago\n", |
---|
| 782 | + error->capture, jiffies_to_msecs(jiffies - error->capture)); |
---|
671 | 783 | |
---|
672 | | - for (i = 0; i < ARRAY_SIZE(error->engine); i++) { |
---|
673 | | - if (error->engine[i].hangcheck_stalled && |
---|
674 | | - error->engine[i].context.pid) { |
---|
675 | | - err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n", |
---|
676 | | - engine_name(m->i915, i), |
---|
677 | | - error->engine[i].context.comm, |
---|
678 | | - error->engine[i].context.pid, |
---|
679 | | - error->engine[i].context.ban_score, |
---|
680 | | - bannable(&error->engine[i].context)); |
---|
681 | | - } |
---|
682 | | - } |
---|
| 784 | + for (ee = error->gt ? error->gt->engine : NULL; ee; ee = ee->next) |
---|
| 785 | + err_printf(m, "Active process (on ring %s): %s [%d]\n", |
---|
| 786 | + ee->engine->name, |
---|
| 787 | + ee->context.comm, |
---|
| 788 | + ee->context.pid); |
---|
| 789 | + |
---|
683 | 790 | err_printf(m, "Reset count: %u\n", error->reset_count); |
---|
684 | 791 | err_printf(m, "Suspend count: %u\n", error->suspend_count); |
---|
685 | 792 | err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform)); |
---|
686 | | - err_print_pciid(m, error->i915); |
---|
| 793 | + err_printf(m, "Subplatform: 0x%x\n", |
---|
| 794 | + intel_subplatform(&error->runtime_info, |
---|
| 795 | + error->device_info.platform)); |
---|
| 796 | + err_print_pciid(m, m->i915); |
---|
687 | 797 | |
---|
688 | 798 | err_printf(m, "IOMMU enabled?: %d\n", error->iommu); |
---|
689 | 799 | |
---|
690 | | - if (HAS_CSR(dev_priv)) { |
---|
691 | | - struct intel_csr *csr = &dev_priv->csr; |
---|
| 800 | + if (HAS_CSR(m->i915)) { |
---|
| 801 | + struct intel_csr *csr = &m->i915->csr; |
---|
692 | 802 | |
---|
693 | 803 | err_printf(m, "DMC loaded: %s\n", |
---|
694 | 804 | yesno(csr->dmc_payload != NULL)); |
---|
.. | .. |
---|
697 | 807 | CSR_VERSION_MINOR(csr->version)); |
---|
698 | 808 | } |
---|
699 | 809 | |
---|
700 | | - err_printf(m, "GT awake: %s\n", yesno(error->awake)); |
---|
701 | 810 | err_printf(m, "RPM wakelock: %s\n", yesno(error->wakelock)); |
---|
702 | 811 | err_printf(m, "PM suspended: %s\n", yesno(error->suspended)); |
---|
703 | | - err_printf(m, "EIR: 0x%08x\n", error->eir); |
---|
704 | | - err_printf(m, "IER: 0x%08x\n", error->ier); |
---|
705 | | - for (i = 0; i < error->ngtier; i++) |
---|
706 | | - err_printf(m, "GTIER[%d]: 0x%08x\n", i, error->gtier[i]); |
---|
707 | | - err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er); |
---|
708 | | - err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake); |
---|
709 | | - err_printf(m, "DERRMR: 0x%08x\n", error->derrmr); |
---|
710 | | - err_printf(m, "CCID: 0x%08x\n", error->ccid); |
---|
711 | | - err_printf(m, "Missed interrupts: 0x%08lx\n", dev_priv->gpu_error.missed_irq_rings); |
---|
712 | 812 | |
---|
713 | | - for (i = 0; i < error->nfence; i++) |
---|
714 | | - err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]); |
---|
715 | | - |
---|
716 | | - if (INTEL_GEN(dev_priv) >= 6) { |
---|
717 | | - err_printf(m, "ERROR: 0x%08x\n", error->error); |
---|
718 | | - |
---|
719 | | - if (INTEL_GEN(dev_priv) >= 8) |
---|
720 | | - err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n", |
---|
721 | | - error->fault_data1, error->fault_data0); |
---|
722 | | - |
---|
723 | | - err_printf(m, "DONE_REG: 0x%08x\n", error->done_reg); |
---|
724 | | - } |
---|
725 | | - |
---|
726 | | - if (IS_GEN7(dev_priv)) |
---|
727 | | - err_printf(m, "ERR_INT: 0x%08x\n", error->err_int); |
---|
728 | | - |
---|
729 | | - for (i = 0; i < ARRAY_SIZE(error->engine); i++) { |
---|
730 | | - if (error->engine[i].engine_id != -1) |
---|
731 | | - error_print_engine(m, &error->engine[i], error->epoch); |
---|
732 | | - } |
---|
733 | | - |
---|
734 | | - for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) { |
---|
735 | | - char buf[128]; |
---|
736 | | - int len, first = 1; |
---|
737 | | - |
---|
738 | | - if (!error->active_vm[i]) |
---|
739 | | - break; |
---|
740 | | - |
---|
741 | | - len = scnprintf(buf, sizeof(buf), "Active ("); |
---|
742 | | - for (j = 0; j < ARRAY_SIZE(error->engine); j++) { |
---|
743 | | - if (error->engine[j].vm != error->active_vm[i]) |
---|
744 | | - continue; |
---|
745 | | - |
---|
746 | | - len += scnprintf(buf + len, sizeof(buf), "%s%s", |
---|
747 | | - first ? "" : ", ", |
---|
748 | | - dev_priv->engine[j]->name); |
---|
749 | | - first = 0; |
---|
750 | | - } |
---|
751 | | - scnprintf(buf + len, sizeof(buf), ")"); |
---|
752 | | - print_error_buffers(m, buf, |
---|
753 | | - error->active_bo[i], |
---|
754 | | - error->active_bo_count[i]); |
---|
755 | | - } |
---|
756 | | - |
---|
757 | | - print_error_buffers(m, "Pinned (global)", |
---|
758 | | - error->pinned_bo, |
---|
759 | | - error->pinned_bo_count); |
---|
760 | | - |
---|
761 | | - for (i = 0; i < ARRAY_SIZE(error->engine); i++) { |
---|
762 | | - const struct drm_i915_error_engine *ee = &error->engine[i]; |
---|
763 | | - |
---|
764 | | - obj = ee->batchbuffer; |
---|
765 | | - if (obj) { |
---|
766 | | - err_puts(m, dev_priv->engine[i]->name); |
---|
767 | | - if (ee->context.pid) |
---|
768 | | - err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d%s)", |
---|
769 | | - ee->context.comm, |
---|
770 | | - ee->context.pid, |
---|
771 | | - ee->context.handle, |
---|
772 | | - ee->context.hw_id, |
---|
773 | | - ee->context.ban_score, |
---|
774 | | - bannable(&ee->context)); |
---|
775 | | - err_printf(m, " --- gtt_offset = 0x%08x %08x\n", |
---|
776 | | - upper_32_bits(obj->gtt_offset), |
---|
777 | | - lower_32_bits(obj->gtt_offset)); |
---|
778 | | - print_error_obj(m, dev_priv->engine[i], NULL, obj); |
---|
779 | | - } |
---|
780 | | - |
---|
781 | | - for (j = 0; j < ee->user_bo_count; j++) |
---|
782 | | - print_error_obj(m, dev_priv->engine[i], |
---|
783 | | - "user", ee->user_bo[j]); |
---|
784 | | - |
---|
785 | | - if (ee->num_requests) { |
---|
786 | | - err_printf(m, "%s --- %d requests\n", |
---|
787 | | - dev_priv->engine[i]->name, |
---|
788 | | - ee->num_requests); |
---|
789 | | - for (j = 0; j < ee->num_requests; j++) |
---|
790 | | - error_print_request(m, " ", |
---|
791 | | - &ee->requests[j], |
---|
792 | | - error->epoch); |
---|
793 | | - } |
---|
794 | | - |
---|
795 | | - if (IS_ERR(ee->waiters)) { |
---|
796 | | - err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n", |
---|
797 | | - dev_priv->engine[i]->name); |
---|
798 | | - } else if (ee->num_waiters) { |
---|
799 | | - err_printf(m, "%s --- %d waiters\n", |
---|
800 | | - dev_priv->engine[i]->name, |
---|
801 | | - ee->num_waiters); |
---|
802 | | - for (j = 0; j < ee->num_waiters; j++) { |
---|
803 | | - err_printf(m, " seqno 0x%08x for %s [%d]\n", |
---|
804 | | - ee->waiters[j].seqno, |
---|
805 | | - ee->waiters[j].comm, |
---|
806 | | - ee->waiters[j].pid); |
---|
807 | | - } |
---|
808 | | - } |
---|
809 | | - |
---|
810 | | - print_error_obj(m, dev_priv->engine[i], |
---|
811 | | - "ringbuffer", ee->ringbuffer); |
---|
812 | | - |
---|
813 | | - print_error_obj(m, dev_priv->engine[i], |
---|
814 | | - "HW Status", ee->hws_page); |
---|
815 | | - |
---|
816 | | - print_error_obj(m, dev_priv->engine[i], |
---|
817 | | - "HW context", ee->ctx); |
---|
818 | | - |
---|
819 | | - print_error_obj(m, dev_priv->engine[i], |
---|
820 | | - "WA context", ee->wa_ctx); |
---|
821 | | - |
---|
822 | | - print_error_obj(m, dev_priv->engine[i], |
---|
823 | | - "WA batchbuffer", ee->wa_batchbuffer); |
---|
824 | | - |
---|
825 | | - print_error_obj(m, dev_priv->engine[i], |
---|
826 | | - "NULL context", ee->default_state); |
---|
827 | | - } |
---|
| 813 | + if (error->gt) |
---|
| 814 | + err_print_gt(m, error->gt); |
---|
828 | 815 | |
---|
829 | 816 | if (error->overlay) |
---|
830 | 817 | intel_overlay_print_error_state(m, error->overlay); |
---|
.. | .. |
---|
832 | 819 | if (error->display) |
---|
833 | 820 | intel_display_print_error_state(m, error->display); |
---|
834 | 821 | |
---|
835 | | - err_print_capabilities(m, &error->device_info, &error->driver_caps); |
---|
| 822 | + err_print_capabilities(m, error); |
---|
836 | 823 | err_print_params(m, &error->params); |
---|
837 | | - err_print_uc(m, &error->uc); |
---|
| 824 | +} |
---|
838 | 825 | |
---|
839 | | - if (m->bytes == 0 && m->err) |
---|
840 | | - return m->err; |
---|
| 826 | +static int err_print_to_sgl(struct i915_gpu_coredump *error) |
---|
| 827 | +{ |
---|
| 828 | + struct drm_i915_error_state_buf m; |
---|
| 829 | + |
---|
| 830 | + if (IS_ERR(error)) |
---|
| 831 | + return PTR_ERR(error); |
---|
| 832 | + |
---|
| 833 | + if (READ_ONCE(error->sgl)) |
---|
| 834 | + return 0; |
---|
| 835 | + |
---|
| 836 | + memset(&m, 0, sizeof(m)); |
---|
| 837 | + m.i915 = error->i915; |
---|
| 838 | + |
---|
| 839 | + __err_print_to_sgl(&m, error); |
---|
| 840 | + |
---|
| 841 | + if (m.buf) { |
---|
| 842 | + __sg_set_buf(m.cur++, m.buf, m.bytes, m.iter); |
---|
| 843 | + m.bytes = 0; |
---|
| 844 | + m.buf = NULL; |
---|
| 845 | + } |
---|
| 846 | + if (m.cur) { |
---|
| 847 | + GEM_BUG_ON(m.end < m.cur); |
---|
| 848 | + sg_mark_end(m.cur - 1); |
---|
| 849 | + } |
---|
| 850 | + GEM_BUG_ON(m.sgl && !m.cur); |
---|
| 851 | + |
---|
| 852 | + if (m.err) { |
---|
| 853 | + err_free_sgl(m.sgl); |
---|
| 854 | + return m.err; |
---|
| 855 | + } |
---|
| 856 | + |
---|
| 857 | + if (cmpxchg(&error->sgl, NULL, m.sgl)) |
---|
| 858 | + err_free_sgl(m.sgl); |
---|
841 | 859 | |
---|
842 | 860 | return 0; |
---|
843 | 861 | } |
---|
844 | 862 | |
---|
845 | | -int i915_error_state_buf_init(struct drm_i915_error_state_buf *ebuf, |
---|
846 | | - struct drm_i915_private *i915, |
---|
847 | | - size_t count, loff_t pos) |
---|
| 863 | +ssize_t i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error, |
---|
| 864 | + char *buf, loff_t off, size_t rem) |
---|
848 | 865 | { |
---|
849 | | - memset(ebuf, 0, sizeof(*ebuf)); |
---|
850 | | - ebuf->i915 = i915; |
---|
| 866 | + struct scatterlist *sg; |
---|
| 867 | + size_t count; |
---|
| 868 | + loff_t pos; |
---|
| 869 | + int err; |
---|
851 | 870 | |
---|
852 | | - /* We need to have enough room to store any i915_error_state printf |
---|
853 | | - * so that we can move it to start position. |
---|
854 | | - */ |
---|
855 | | - ebuf->size = count + 1 > PAGE_SIZE ? count + 1 : PAGE_SIZE; |
---|
856 | | - ebuf->buf = kmalloc(ebuf->size, |
---|
857 | | - GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); |
---|
| 871 | + if (!error || !rem) |
---|
| 872 | + return 0; |
---|
858 | 873 | |
---|
859 | | - if (ebuf->buf == NULL) { |
---|
860 | | - ebuf->size = PAGE_SIZE; |
---|
861 | | - ebuf->buf = kmalloc(ebuf->size, GFP_KERNEL); |
---|
| 874 | + err = err_print_to_sgl(error); |
---|
| 875 | + if (err) |
---|
| 876 | + return err; |
---|
| 877 | + |
---|
| 878 | + sg = READ_ONCE(error->fit); |
---|
| 879 | + if (!sg || off < sg->dma_address) |
---|
| 880 | + sg = error->sgl; |
---|
| 881 | + if (!sg) |
---|
| 882 | + return 0; |
---|
| 883 | + |
---|
| 884 | + pos = sg->dma_address; |
---|
| 885 | + count = 0; |
---|
| 886 | + do { |
---|
| 887 | + size_t len, start; |
---|
| 888 | + |
---|
| 889 | + if (sg_is_chain(sg)) { |
---|
| 890 | + sg = sg_chain_ptr(sg); |
---|
| 891 | + GEM_BUG_ON(sg_is_chain(sg)); |
---|
| 892 | + } |
---|
| 893 | + |
---|
| 894 | + len = sg->length; |
---|
| 895 | + if (pos + len <= off) { |
---|
| 896 | + pos += len; |
---|
| 897 | + continue; |
---|
| 898 | + } |
---|
| 899 | + |
---|
| 900 | + start = sg->offset; |
---|
| 901 | + if (pos < off) { |
---|
| 902 | + GEM_BUG_ON(off - pos > len); |
---|
| 903 | + len -= off - pos; |
---|
| 904 | + start += off - pos; |
---|
| 905 | + pos = off; |
---|
| 906 | + } |
---|
| 907 | + |
---|
| 908 | + len = min(len, rem); |
---|
| 909 | + GEM_BUG_ON(!len || len > sg->length); |
---|
| 910 | + |
---|
| 911 | + memcpy(buf, page_address(sg_page(sg)) + start, len); |
---|
| 912 | + |
---|
| 913 | + count += len; |
---|
| 914 | + pos += len; |
---|
| 915 | + |
---|
| 916 | + buf += len; |
---|
| 917 | + rem -= len; |
---|
| 918 | + if (!rem) { |
---|
| 919 | + WRITE_ONCE(error->fit, sg); |
---|
| 920 | + break; |
---|
| 921 | + } |
---|
| 922 | + } while (!sg_is_last(sg++)); |
---|
| 923 | + |
---|
| 924 | + return count; |
---|
| 925 | +} |
---|
| 926 | + |
---|
| 927 | +static void i915_vma_coredump_free(struct i915_vma_coredump *vma) |
---|
| 928 | +{ |
---|
| 929 | + while (vma) { |
---|
| 930 | + struct i915_vma_coredump *next = vma->next; |
---|
| 931 | + int page; |
---|
| 932 | + |
---|
| 933 | + for (page = 0; page < vma->page_count; page++) |
---|
| 934 | + free_page((unsigned long)vma->pages[page]); |
---|
| 935 | + |
---|
| 936 | + kfree(vma); |
---|
| 937 | + vma = next; |
---|
| 938 | + } |
---|
| 939 | +} |
---|
| 940 | + |
---|
| 941 | +static void cleanup_params(struct i915_gpu_coredump *error) |
---|
| 942 | +{ |
---|
| 943 | + i915_params_free(&error->params); |
---|
| 944 | +} |
---|
| 945 | + |
---|
| 946 | +static void cleanup_uc(struct intel_uc_coredump *uc) |
---|
| 947 | +{ |
---|
| 948 | + kfree(uc->guc_fw.path); |
---|
| 949 | + kfree(uc->huc_fw.path); |
---|
| 950 | + i915_vma_coredump_free(uc->guc_log); |
---|
| 951 | + |
---|
| 952 | + kfree(uc); |
---|
| 953 | +} |
---|
| 954 | + |
---|
| 955 | +static void cleanup_gt(struct intel_gt_coredump *gt) |
---|
| 956 | +{ |
---|
| 957 | + while (gt->engine) { |
---|
| 958 | + struct intel_engine_coredump *ee = gt->engine; |
---|
| 959 | + |
---|
| 960 | + gt->engine = ee->next; |
---|
| 961 | + |
---|
| 962 | + i915_vma_coredump_free(ee->vma); |
---|
| 963 | + kfree(ee); |
---|
862 | 964 | } |
---|
863 | 965 | |
---|
864 | | - if (ebuf->buf == NULL) { |
---|
865 | | - ebuf->size = 128; |
---|
866 | | - ebuf->buf = kmalloc(ebuf->size, GFP_KERNEL); |
---|
867 | | - } |
---|
| 966 | + if (gt->uc) |
---|
| 967 | + cleanup_uc(gt->uc); |
---|
868 | 968 | |
---|
869 | | - if (ebuf->buf == NULL) |
---|
870 | | - return -ENOMEM; |
---|
871 | | - |
---|
872 | | - ebuf->start = pos; |
---|
873 | | - |
---|
874 | | - return 0; |
---|
| 969 | + kfree(gt); |
---|
875 | 970 | } |
---|
876 | 971 | |
---|
877 | | -static void i915_error_object_free(struct drm_i915_error_object *obj) |
---|
| 972 | +void __i915_gpu_coredump_free(struct kref *error_ref) |
---|
878 | 973 | { |
---|
879 | | - int page; |
---|
880 | | - |
---|
881 | | - if (obj == NULL) |
---|
882 | | - return; |
---|
883 | | - |
---|
884 | | - for (page = 0; page < obj->page_count; page++) |
---|
885 | | - free_page((unsigned long)obj->pages[page]); |
---|
886 | | - |
---|
887 | | - kfree(obj); |
---|
888 | | -} |
---|
889 | | - |
---|
890 | | -static __always_inline void free_param(const char *type, void *x) |
---|
891 | | -{ |
---|
892 | | - if (!__builtin_strcmp(type, "char *")) |
---|
893 | | - kfree(*(void **)x); |
---|
894 | | -} |
---|
895 | | - |
---|
896 | | -static void cleanup_params(struct i915_gpu_state *error) |
---|
897 | | -{ |
---|
898 | | -#define FREE(T, x, ...) free_param(#T, &error->params.x); |
---|
899 | | - I915_PARAMS_FOR_EACH(FREE); |
---|
900 | | -#undef FREE |
---|
901 | | -} |
---|
902 | | - |
---|
903 | | -static void cleanup_uc_state(struct i915_gpu_state *error) |
---|
904 | | -{ |
---|
905 | | - struct i915_error_uc *error_uc = &error->uc; |
---|
906 | | - |
---|
907 | | - kfree(error_uc->guc_fw.path); |
---|
908 | | - kfree(error_uc->huc_fw.path); |
---|
909 | | - i915_error_object_free(error_uc->guc_log); |
---|
910 | | -} |
---|
911 | | - |
---|
912 | | -void __i915_gpu_state_free(struct kref *error_ref) |
---|
913 | | -{ |
---|
914 | | - struct i915_gpu_state *error = |
---|
| 974 | + struct i915_gpu_coredump *error = |
---|
915 | 975 | container_of(error_ref, typeof(*error), ref); |
---|
916 | | - long i, j; |
---|
917 | 976 | |
---|
918 | | - for (i = 0; i < ARRAY_SIZE(error->engine); i++) { |
---|
919 | | - struct drm_i915_error_engine *ee = &error->engine[i]; |
---|
| 977 | + while (error->gt) { |
---|
| 978 | + struct intel_gt_coredump *gt = error->gt; |
---|
920 | 979 | |
---|
921 | | - for (j = 0; j < ee->user_bo_count; j++) |
---|
922 | | - i915_error_object_free(ee->user_bo[j]); |
---|
923 | | - kfree(ee->user_bo); |
---|
924 | | - |
---|
925 | | - i915_error_object_free(ee->batchbuffer); |
---|
926 | | - i915_error_object_free(ee->wa_batchbuffer); |
---|
927 | | - i915_error_object_free(ee->ringbuffer); |
---|
928 | | - i915_error_object_free(ee->hws_page); |
---|
929 | | - i915_error_object_free(ee->ctx); |
---|
930 | | - i915_error_object_free(ee->wa_ctx); |
---|
931 | | - |
---|
932 | | - kfree(ee->requests); |
---|
933 | | - if (!IS_ERR_OR_NULL(ee->waiters)) |
---|
934 | | - kfree(ee->waiters); |
---|
| 980 | + error->gt = gt->next; |
---|
| 981 | + cleanup_gt(gt); |
---|
935 | 982 | } |
---|
936 | | - |
---|
937 | | - for (i = 0; i < ARRAY_SIZE(error->active_bo); i++) |
---|
938 | | - kfree(error->active_bo[i]); |
---|
939 | | - kfree(error->pinned_bo); |
---|
940 | 983 | |
---|
941 | 984 | kfree(error->overlay); |
---|
942 | 985 | kfree(error->display); |
---|
943 | 986 | |
---|
944 | 987 | cleanup_params(error); |
---|
945 | | - cleanup_uc_state(error); |
---|
946 | 988 | |
---|
| 989 | + err_free_sgl(error->sgl); |
---|
947 | 990 | kfree(error); |
---|
948 | 991 | } |
---|
949 | 992 | |
---|
950 | | -static struct drm_i915_error_object * |
---|
951 | | -i915_error_object_create(struct drm_i915_private *i915, |
---|
952 | | - struct i915_vma *vma) |
---|
| 993 | +static struct i915_vma_coredump * |
---|
| 994 | +i915_vma_coredump_create(const struct intel_gt *gt, |
---|
| 995 | + const struct i915_vma *vma, |
---|
| 996 | + const char *name, |
---|
| 997 | + struct i915_vma_compress *compress) |
---|
953 | 998 | { |
---|
954 | | - struct i915_ggtt *ggtt = &i915->ggtt; |
---|
| 999 | + struct i915_ggtt *ggtt = gt->ggtt; |
---|
955 | 1000 | const u64 slot = ggtt->error_capture.start; |
---|
956 | | - struct drm_i915_error_object *dst; |
---|
957 | | - struct compress compress; |
---|
| 1001 | + struct i915_vma_coredump *dst; |
---|
958 | 1002 | unsigned long num_pages; |
---|
959 | 1003 | struct sgt_iter iter; |
---|
960 | | - dma_addr_t dma; |
---|
961 | 1004 | int ret; |
---|
962 | 1005 | |
---|
963 | | - if (!vma) |
---|
| 1006 | + might_sleep(); |
---|
| 1007 | + |
---|
| 1008 | + if (!vma || !vma->pages || !compress) |
---|
964 | 1009 | return NULL; |
---|
965 | 1010 | |
---|
966 | 1011 | num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT; |
---|
967 | 1012 | num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */ |
---|
968 | | - dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), |
---|
969 | | - GFP_ATOMIC | __GFP_NOWARN); |
---|
| 1013 | + dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), ALLOW_FAIL); |
---|
970 | 1014 | if (!dst) |
---|
971 | 1015 | return NULL; |
---|
972 | 1016 | |
---|
| 1017 | + if (!compress_start(compress)) { |
---|
| 1018 | + kfree(dst); |
---|
| 1019 | + return NULL; |
---|
| 1020 | + } |
---|
| 1021 | + |
---|
| 1022 | + strcpy(dst->name, name); |
---|
| 1023 | + dst->next = NULL; |
---|
| 1024 | + |
---|
973 | 1025 | dst->gtt_offset = vma->node.start; |
---|
974 | 1026 | dst->gtt_size = vma->node.size; |
---|
| 1027 | + dst->gtt_page_sizes = vma->page_sizes.gtt; |
---|
975 | 1028 | dst->num_pages = num_pages; |
---|
976 | 1029 | dst->page_count = 0; |
---|
977 | 1030 | dst->unused = 0; |
---|
978 | 1031 | |
---|
979 | | - if (!compress_init(&compress)) { |
---|
980 | | - kfree(dst); |
---|
981 | | - return NULL; |
---|
982 | | - } |
---|
983 | | - |
---|
984 | 1032 | ret = -EINVAL; |
---|
985 | | - for_each_sgt_dma(dma, iter, vma->pages) { |
---|
| 1033 | + if (drm_mm_node_allocated(&ggtt->error_capture)) { |
---|
986 | 1034 | void __iomem *s; |
---|
| 1035 | + dma_addr_t dma; |
---|
987 | 1036 | |
---|
988 | | - ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0); |
---|
| 1037 | + for_each_sgt_daddr(dma, iter, vma->pages) { |
---|
| 1038 | + ggtt->vm.insert_page(&ggtt->vm, dma, slot, |
---|
| 1039 | + I915_CACHE_NONE, 0); |
---|
| 1040 | + mb(); |
---|
989 | 1041 | |
---|
990 | | - s = io_mapping_map_atomic_wc(&ggtt->iomap, slot); |
---|
991 | | - ret = compress_page(&compress, (void __force *)s, dst); |
---|
992 | | - io_mapping_unmap_atomic(s); |
---|
993 | | - if (ret) |
---|
994 | | - break; |
---|
| 1042 | + s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE); |
---|
| 1043 | + ret = compress_page(compress, |
---|
| 1044 | + (void __force *)s, dst, |
---|
| 1045 | + true); |
---|
| 1046 | + io_mapping_unmap(s); |
---|
| 1047 | + if (ret) |
---|
| 1048 | + break; |
---|
| 1049 | + } |
---|
| 1050 | + } else if (i915_gem_object_is_lmem(vma->obj)) { |
---|
| 1051 | + struct intel_memory_region *mem = vma->obj->mm.region; |
---|
| 1052 | + dma_addr_t dma; |
---|
| 1053 | + |
---|
| 1054 | + for_each_sgt_daddr(dma, iter, vma->pages) { |
---|
| 1055 | + void __iomem *s; |
---|
| 1056 | + |
---|
| 1057 | + s = io_mapping_map_wc(&mem->iomap, dma, PAGE_SIZE); |
---|
| 1058 | + ret = compress_page(compress, |
---|
| 1059 | + (void __force *)s, dst, |
---|
| 1060 | + true); |
---|
| 1061 | + io_mapping_unmap(s); |
---|
| 1062 | + if (ret) |
---|
| 1063 | + break; |
---|
| 1064 | + } |
---|
| 1065 | + } else { |
---|
| 1066 | + struct page *page; |
---|
| 1067 | + |
---|
| 1068 | + for_each_sgt_page(page, iter, vma->pages) { |
---|
| 1069 | + void *s; |
---|
| 1070 | + |
---|
| 1071 | + drm_clflush_pages(&page, 1); |
---|
| 1072 | + |
---|
| 1073 | + s = kmap(page); |
---|
| 1074 | + ret = compress_page(compress, s, dst, false); |
---|
| 1075 | + kunmap(page); |
---|
| 1076 | + |
---|
| 1077 | + drm_clflush_pages(&page, 1); |
---|
| 1078 | + |
---|
| 1079 | + if (ret) |
---|
| 1080 | + break; |
---|
| 1081 | + } |
---|
995 | 1082 | } |
---|
996 | 1083 | |
---|
997 | | - if (ret || compress_flush(&compress, dst)) { |
---|
| 1084 | + if (ret || compress_flush(compress, dst)) { |
---|
998 | 1085 | while (dst->page_count--) |
---|
999 | | - free_page((unsigned long)dst->pages[dst->page_count]); |
---|
| 1086 | + pool_free(&compress->pool, dst->pages[dst->page_count]); |
---|
1000 | 1087 | kfree(dst); |
---|
1001 | 1088 | dst = NULL; |
---|
1002 | 1089 | } |
---|
| 1090 | + compress_finish(compress); |
---|
1003 | 1091 | |
---|
1004 | | - compress_fini(&compress, dst); |
---|
1005 | | - ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE); |
---|
1006 | 1092 | return dst; |
---|
1007 | 1093 | } |
---|
1008 | 1094 | |
---|
1009 | | -/* The error capture is special as tries to run underneath the normal |
---|
1010 | | - * locking rules - so we use the raw version of the i915_gem_active lookup. |
---|
1011 | | - */ |
---|
1012 | | -static inline uint32_t |
---|
1013 | | -__active_get_seqno(struct i915_gem_active *active) |
---|
| 1095 | +static void gt_record_fences(struct intel_gt_coredump *gt) |
---|
1014 | 1096 | { |
---|
1015 | | - struct i915_request *request; |
---|
1016 | | - |
---|
1017 | | - request = __i915_gem_active_peek(active); |
---|
1018 | | - return request ? request->global_seqno : 0; |
---|
1019 | | -} |
---|
1020 | | - |
---|
1021 | | -static inline int |
---|
1022 | | -__active_get_engine_id(struct i915_gem_active *active) |
---|
1023 | | -{ |
---|
1024 | | - struct i915_request *request; |
---|
1025 | | - |
---|
1026 | | - request = __i915_gem_active_peek(active); |
---|
1027 | | - return request ? request->engine->id : -1; |
---|
1028 | | -} |
---|
1029 | | - |
---|
1030 | | -static void capture_bo(struct drm_i915_error_buffer *err, |
---|
1031 | | - struct i915_vma *vma) |
---|
1032 | | -{ |
---|
1033 | | - struct drm_i915_gem_object *obj = vma->obj; |
---|
1034 | | - |
---|
1035 | | - err->size = obj->base.size; |
---|
1036 | | - err->name = obj->base.name; |
---|
1037 | | - |
---|
1038 | | - err->wseqno = __active_get_seqno(&obj->frontbuffer_write); |
---|
1039 | | - err->engine = __active_get_engine_id(&obj->frontbuffer_write); |
---|
1040 | | - |
---|
1041 | | - err->gtt_offset = vma->node.start; |
---|
1042 | | - err->read_domains = obj->read_domains; |
---|
1043 | | - err->write_domain = obj->write_domain; |
---|
1044 | | - err->fence_reg = vma->fence ? vma->fence->id : -1; |
---|
1045 | | - err->tiling = i915_gem_object_get_tiling(obj); |
---|
1046 | | - err->dirty = obj->mm.dirty; |
---|
1047 | | - err->purgeable = obj->mm.madv != I915_MADV_WILLNEED; |
---|
1048 | | - err->userptr = obj->userptr.mm != NULL; |
---|
1049 | | - err->cache_level = obj->cache_level; |
---|
1050 | | -} |
---|
1051 | | - |
---|
1052 | | -static u32 capture_error_bo(struct drm_i915_error_buffer *err, |
---|
1053 | | - int count, struct list_head *head, |
---|
1054 | | - bool pinned_only) |
---|
1055 | | -{ |
---|
1056 | | - struct i915_vma *vma; |
---|
1057 | | - int i = 0; |
---|
1058 | | - |
---|
1059 | | - list_for_each_entry(vma, head, vm_link) { |
---|
1060 | | - if (!vma->obj) |
---|
1061 | | - continue; |
---|
1062 | | - |
---|
1063 | | - if (pinned_only && !i915_vma_is_pinned(vma)) |
---|
1064 | | - continue; |
---|
1065 | | - |
---|
1066 | | - capture_bo(err++, vma); |
---|
1067 | | - if (++i == count) |
---|
1068 | | - break; |
---|
1069 | | - } |
---|
1070 | | - |
---|
1071 | | - return i; |
---|
1072 | | -} |
---|
1073 | | - |
---|
1074 | | -/* Generate a semi-unique error code. The code is not meant to have meaning, The |
---|
1075 | | - * code's only purpose is to try to prevent false duplicated bug reports by |
---|
1076 | | - * grossly estimating a GPU error state. |
---|
1077 | | - * |
---|
1078 | | - * TODO Ideally, hashing the batchbuffer would be a very nice way to determine |
---|
1079 | | - * the hang if we could strip the GTT offset information from it. |
---|
1080 | | - * |
---|
1081 | | - * It's only a small step better than a random number in its current form. |
---|
1082 | | - */ |
---|
1083 | | -static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, |
---|
1084 | | - struct i915_gpu_state *error, |
---|
1085 | | - int *engine_id) |
---|
1086 | | -{ |
---|
1087 | | - uint32_t error_code = 0; |
---|
| 1097 | + struct i915_ggtt *ggtt = gt->_gt->ggtt; |
---|
| 1098 | + struct intel_uncore *uncore = gt->_gt->uncore; |
---|
1088 | 1099 | int i; |
---|
1089 | 1100 | |
---|
1090 | | - /* IPEHR would be an ideal way to detect errors, as it's the gross |
---|
1091 | | - * measure of "the command that hung." However, has some very common |
---|
1092 | | - * synchronization commands which almost always appear in the case |
---|
1093 | | - * strictly a client bug. Use instdone to differentiate those some. |
---|
1094 | | - */ |
---|
1095 | | - for (i = 0; i < I915_NUM_ENGINES; i++) { |
---|
1096 | | - if (error->engine[i].hangcheck_stalled) { |
---|
1097 | | - if (engine_id) |
---|
1098 | | - *engine_id = i; |
---|
1099 | | - |
---|
1100 | | - return error->engine[i].ipehr ^ |
---|
1101 | | - error->engine[i].instdone.instdone; |
---|
1102 | | - } |
---|
1103 | | - } |
---|
1104 | | - |
---|
1105 | | - return error_code; |
---|
1106 | | -} |
---|
1107 | | - |
---|
1108 | | -static void gem_record_fences(struct i915_gpu_state *error) |
---|
1109 | | -{ |
---|
1110 | | - struct drm_i915_private *dev_priv = error->i915; |
---|
1111 | | - int i; |
---|
1112 | | - |
---|
1113 | | - if (INTEL_GEN(dev_priv) >= 6) { |
---|
1114 | | - for (i = 0; i < dev_priv->num_fence_regs; i++) |
---|
1115 | | - error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i)); |
---|
1116 | | - } else if (INTEL_GEN(dev_priv) >= 4) { |
---|
1117 | | - for (i = 0; i < dev_priv->num_fence_regs; i++) |
---|
1118 | | - error->fence[i] = I915_READ64(FENCE_REG_965_LO(i)); |
---|
| 1101 | + if (INTEL_GEN(uncore->i915) >= 6) { |
---|
| 1102 | + for (i = 0; i < ggtt->num_fences; i++) |
---|
| 1103 | + gt->fence[i] = |
---|
| 1104 | + intel_uncore_read64(uncore, |
---|
| 1105 | + FENCE_REG_GEN6_LO(i)); |
---|
| 1106 | + } else if (INTEL_GEN(uncore->i915) >= 4) { |
---|
| 1107 | + for (i = 0; i < ggtt->num_fences; i++) |
---|
| 1108 | + gt->fence[i] = |
---|
| 1109 | + intel_uncore_read64(uncore, |
---|
| 1110 | + FENCE_REG_965_LO(i)); |
---|
1119 | 1111 | } else { |
---|
1120 | | - for (i = 0; i < dev_priv->num_fence_regs; i++) |
---|
1121 | | - error->fence[i] = I915_READ(FENCE_REG(i)); |
---|
| 1112 | + for (i = 0; i < ggtt->num_fences; i++) |
---|
| 1113 | + gt->fence[i] = |
---|
| 1114 | + intel_uncore_read(uncore, FENCE_REG(i)); |
---|
1122 | 1115 | } |
---|
1123 | | - error->nfence = i; |
---|
| 1116 | + gt->nfence = i; |
---|
1124 | 1117 | } |
---|
1125 | 1118 | |
---|
1126 | | -static void gen6_record_semaphore_state(struct intel_engine_cs *engine, |
---|
1127 | | - struct drm_i915_error_engine *ee) |
---|
| 1119 | +static void engine_record_registers(struct intel_engine_coredump *ee) |
---|
1128 | 1120 | { |
---|
1129 | | - struct drm_i915_private *dev_priv = engine->i915; |
---|
| 1121 | + const struct intel_engine_cs *engine = ee->engine; |
---|
| 1122 | + struct drm_i915_private *i915 = engine->i915; |
---|
1130 | 1123 | |
---|
1131 | | - ee->semaphore_mboxes[0] = I915_READ(RING_SYNC_0(engine->mmio_base)); |
---|
1132 | | - ee->semaphore_mboxes[1] = I915_READ(RING_SYNC_1(engine->mmio_base)); |
---|
1133 | | - if (HAS_VEBOX(dev_priv)) |
---|
1134 | | - ee->semaphore_mboxes[2] = |
---|
1135 | | - I915_READ(RING_SYNC_2(engine->mmio_base)); |
---|
1136 | | -} |
---|
| 1124 | + if (INTEL_GEN(i915) >= 6) { |
---|
| 1125 | + ee->rc_psmi = ENGINE_READ(engine, RING_PSMI_CTL); |
---|
1137 | 1126 | |
---|
1138 | | -static void error_record_engine_waiters(struct intel_engine_cs *engine, |
---|
1139 | | - struct drm_i915_error_engine *ee) |
---|
1140 | | -{ |
---|
1141 | | - struct intel_breadcrumbs *b = &engine->breadcrumbs; |
---|
1142 | | - struct drm_i915_error_waiter *waiter; |
---|
1143 | | - struct rb_node *rb; |
---|
1144 | | - int count; |
---|
1145 | | - |
---|
1146 | | - ee->num_waiters = 0; |
---|
1147 | | - ee->waiters = NULL; |
---|
1148 | | - |
---|
1149 | | - if (RB_EMPTY_ROOT(&b->waiters)) |
---|
1150 | | - return; |
---|
1151 | | - |
---|
1152 | | - if (!spin_trylock_irq(&b->rb_lock)) { |
---|
1153 | | - ee->waiters = ERR_PTR(-EDEADLK); |
---|
1154 | | - return; |
---|
| 1127 | + if (INTEL_GEN(i915) >= 12) |
---|
| 1128 | + ee->fault_reg = intel_uncore_read(engine->uncore, |
---|
| 1129 | + GEN12_RING_FAULT_REG); |
---|
| 1130 | + else if (INTEL_GEN(i915) >= 8) |
---|
| 1131 | + ee->fault_reg = intel_uncore_read(engine->uncore, |
---|
| 1132 | + GEN8_RING_FAULT_REG); |
---|
| 1133 | + else |
---|
| 1134 | + ee->fault_reg = GEN6_RING_FAULT_REG_READ(engine); |
---|
1155 | 1135 | } |
---|
1156 | 1136 | |
---|
1157 | | - count = 0; |
---|
1158 | | - for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb)) |
---|
1159 | | - count++; |
---|
1160 | | - spin_unlock_irq(&b->rb_lock); |
---|
1161 | | - |
---|
1162 | | - waiter = NULL; |
---|
1163 | | - if (count) |
---|
1164 | | - waiter = kmalloc_array(count, |
---|
1165 | | - sizeof(struct drm_i915_error_waiter), |
---|
1166 | | - GFP_ATOMIC); |
---|
1167 | | - if (!waiter) |
---|
1168 | | - return; |
---|
1169 | | - |
---|
1170 | | - if (!spin_trylock_irq(&b->rb_lock)) { |
---|
1171 | | - kfree(waiter); |
---|
1172 | | - ee->waiters = ERR_PTR(-EDEADLK); |
---|
1173 | | - return; |
---|
1174 | | - } |
---|
1175 | | - |
---|
1176 | | - ee->waiters = waiter; |
---|
1177 | | - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { |
---|
1178 | | - struct intel_wait *w = rb_entry(rb, typeof(*w), node); |
---|
1179 | | - |
---|
1180 | | - strcpy(waiter->comm, w->tsk->comm); |
---|
1181 | | - waiter->pid = w->tsk->pid; |
---|
1182 | | - waiter->seqno = w->seqno; |
---|
1183 | | - waiter++; |
---|
1184 | | - |
---|
1185 | | - if (++ee->num_waiters == count) |
---|
1186 | | - break; |
---|
1187 | | - } |
---|
1188 | | - spin_unlock_irq(&b->rb_lock); |
---|
1189 | | -} |
---|
1190 | | - |
---|
1191 | | -static void error_record_engine_registers(struct i915_gpu_state *error, |
---|
1192 | | - struct intel_engine_cs *engine, |
---|
1193 | | - struct drm_i915_error_engine *ee) |
---|
1194 | | -{ |
---|
1195 | | - struct drm_i915_private *dev_priv = engine->i915; |
---|
1196 | | - |
---|
1197 | | - if (INTEL_GEN(dev_priv) >= 6) { |
---|
1198 | | - ee->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base)); |
---|
1199 | | - if (INTEL_GEN(dev_priv) >= 8) { |
---|
1200 | | - ee->fault_reg = I915_READ(GEN8_RING_FAULT_REG); |
---|
1201 | | - } else { |
---|
1202 | | - gen6_record_semaphore_state(engine, ee); |
---|
1203 | | - ee->fault_reg = I915_READ(RING_FAULT_REG(engine)); |
---|
| 1137 | + if (INTEL_GEN(i915) >= 4) { |
---|
| 1138 | + ee->esr = ENGINE_READ(engine, RING_ESR); |
---|
| 1139 | + ee->faddr = ENGINE_READ(engine, RING_DMA_FADD); |
---|
| 1140 | + ee->ipeir = ENGINE_READ(engine, RING_IPEIR); |
---|
| 1141 | + ee->ipehr = ENGINE_READ(engine, RING_IPEHR); |
---|
| 1142 | + ee->instps = ENGINE_READ(engine, RING_INSTPS); |
---|
| 1143 | + ee->bbaddr = ENGINE_READ(engine, RING_BBADDR); |
---|
| 1144 | + ee->ccid = ENGINE_READ(engine, CCID); |
---|
| 1145 | + if (INTEL_GEN(i915) >= 8) { |
---|
| 1146 | + ee->faddr |= (u64)ENGINE_READ(engine, RING_DMA_FADD_UDW) << 32; |
---|
| 1147 | + ee->bbaddr |= (u64)ENGINE_READ(engine, RING_BBADDR_UDW) << 32; |
---|
1204 | 1148 | } |
---|
1205 | | - } |
---|
1206 | | - |
---|
1207 | | - if (INTEL_GEN(dev_priv) >= 4) { |
---|
1208 | | - ee->faddr = I915_READ(RING_DMA_FADD(engine->mmio_base)); |
---|
1209 | | - ee->ipeir = I915_READ(RING_IPEIR(engine->mmio_base)); |
---|
1210 | | - ee->ipehr = I915_READ(RING_IPEHR(engine->mmio_base)); |
---|
1211 | | - ee->instps = I915_READ(RING_INSTPS(engine->mmio_base)); |
---|
1212 | | - ee->bbaddr = I915_READ(RING_BBADDR(engine->mmio_base)); |
---|
1213 | | - if (INTEL_GEN(dev_priv) >= 8) { |
---|
1214 | | - ee->faddr |= (u64) I915_READ(RING_DMA_FADD_UDW(engine->mmio_base)) << 32; |
---|
1215 | | - ee->bbaddr |= (u64) I915_READ(RING_BBADDR_UDW(engine->mmio_base)) << 32; |
---|
1216 | | - } |
---|
1217 | | - ee->bbstate = I915_READ(RING_BBSTATE(engine->mmio_base)); |
---|
| 1149 | + ee->bbstate = ENGINE_READ(engine, RING_BBSTATE); |
---|
1218 | 1150 | } else { |
---|
1219 | | - ee->faddr = I915_READ(DMA_FADD_I8XX); |
---|
1220 | | - ee->ipeir = I915_READ(IPEIR); |
---|
1221 | | - ee->ipehr = I915_READ(IPEHR); |
---|
| 1151 | + ee->faddr = ENGINE_READ(engine, DMA_FADD_I8XX); |
---|
| 1152 | + ee->ipeir = ENGINE_READ(engine, IPEIR); |
---|
| 1153 | + ee->ipehr = ENGINE_READ(engine, IPEHR); |
---|
1222 | 1154 | } |
---|
1223 | 1155 | |
---|
1224 | 1156 | intel_engine_get_instdone(engine, &ee->instdone); |
---|
1225 | 1157 | |
---|
1226 | | - ee->waiting = intel_engine_has_waiter(engine); |
---|
1227 | | - ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); |
---|
| 1158 | + ee->instpm = ENGINE_READ(engine, RING_INSTPM); |
---|
1228 | 1159 | ee->acthd = intel_engine_get_active_head(engine); |
---|
1229 | | - ee->seqno = intel_engine_get_seqno(engine); |
---|
1230 | | - ee->last_seqno = intel_engine_last_submit(engine); |
---|
1231 | | - ee->start = I915_READ_START(engine); |
---|
1232 | | - ee->head = I915_READ_HEAD(engine); |
---|
1233 | | - ee->tail = I915_READ_TAIL(engine); |
---|
1234 | | - ee->ctl = I915_READ_CTL(engine); |
---|
1235 | | - if (INTEL_GEN(dev_priv) > 2) |
---|
1236 | | - ee->mode = I915_READ_MODE(engine); |
---|
| 1160 | + ee->start = ENGINE_READ(engine, RING_START); |
---|
| 1161 | + ee->head = ENGINE_READ(engine, RING_HEAD); |
---|
| 1162 | + ee->tail = ENGINE_READ(engine, RING_TAIL); |
---|
| 1163 | + ee->ctl = ENGINE_READ(engine, RING_CTL); |
---|
| 1164 | + if (INTEL_GEN(i915) > 2) |
---|
| 1165 | + ee->mode = ENGINE_READ(engine, RING_MI_MODE); |
---|
1237 | 1166 | |
---|
1238 | | - if (!HWS_NEEDS_PHYSICAL(dev_priv)) { |
---|
| 1167 | + if (!HWS_NEEDS_PHYSICAL(i915)) { |
---|
1239 | 1168 | i915_reg_t mmio; |
---|
1240 | 1169 | |
---|
1241 | | - if (IS_GEN7(dev_priv)) { |
---|
| 1170 | + if (IS_GEN(i915, 7)) { |
---|
1242 | 1171 | switch (engine->id) { |
---|
1243 | 1172 | default: |
---|
1244 | | - case RCS: |
---|
| 1173 | + MISSING_CASE(engine->id); |
---|
| 1174 | + fallthrough; |
---|
| 1175 | + case RCS0: |
---|
1245 | 1176 | mmio = RENDER_HWS_PGA_GEN7; |
---|
1246 | 1177 | break; |
---|
1247 | | - case BCS: |
---|
| 1178 | + case BCS0: |
---|
1248 | 1179 | mmio = BLT_HWS_PGA_GEN7; |
---|
1249 | 1180 | break; |
---|
1250 | | - case VCS: |
---|
| 1181 | + case VCS0: |
---|
1251 | 1182 | mmio = BSD_HWS_PGA_GEN7; |
---|
1252 | 1183 | break; |
---|
1253 | | - case VECS: |
---|
| 1184 | + case VECS0: |
---|
1254 | 1185 | mmio = VEBOX_HWS_PGA_GEN7; |
---|
1255 | 1186 | break; |
---|
1256 | 1187 | } |
---|
1257 | | - } else if (IS_GEN6(engine->i915)) { |
---|
| 1188 | + } else if (IS_GEN(engine->i915, 6)) { |
---|
1258 | 1189 | mmio = RING_HWS_PGA_GEN6(engine->mmio_base); |
---|
1259 | 1190 | } else { |
---|
1260 | 1191 | /* XXX: gen8 returns to sanity */ |
---|
1261 | 1192 | mmio = RING_HWS_PGA(engine->mmio_base); |
---|
1262 | 1193 | } |
---|
1263 | 1194 | |
---|
1264 | | - ee->hws = I915_READ(mmio); |
---|
| 1195 | + ee->hws = intel_uncore_read(engine->uncore, mmio); |
---|
1265 | 1196 | } |
---|
1266 | 1197 | |
---|
1267 | | - ee->idle = intel_engine_is_idle(engine); |
---|
1268 | | - ee->hangcheck_timestamp = engine->hangcheck.action_timestamp; |
---|
1269 | | - ee->hangcheck_action = engine->hangcheck.action; |
---|
1270 | | - ee->hangcheck_stalled = engine->hangcheck.stalled; |
---|
1271 | | - ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error, |
---|
1272 | | - engine); |
---|
| 1198 | + ee->reset_count = i915_reset_engine_count(&i915->gpu_error, engine); |
---|
1273 | 1199 | |
---|
1274 | | - if (USES_PPGTT(dev_priv)) { |
---|
| 1200 | + if (HAS_PPGTT(i915)) { |
---|
1275 | 1201 | int i; |
---|
1276 | 1202 | |
---|
1277 | | - ee->vm_info.gfx_mode = I915_READ(RING_MODE_GEN7(engine)); |
---|
| 1203 | + ee->vm_info.gfx_mode = ENGINE_READ(engine, RING_MODE_GEN7); |
---|
1278 | 1204 | |
---|
1279 | | - if (IS_GEN6(dev_priv)) |
---|
| 1205 | + if (IS_GEN(i915, 6)) { |
---|
1280 | 1206 | ee->vm_info.pp_dir_base = |
---|
1281 | | - I915_READ(RING_PP_DIR_BASE_READ(engine)); |
---|
1282 | | - else if (IS_GEN7(dev_priv)) |
---|
| 1207 | + ENGINE_READ(engine, RING_PP_DIR_BASE_READ); |
---|
| 1208 | + } else if (IS_GEN(i915, 7)) { |
---|
1283 | 1209 | ee->vm_info.pp_dir_base = |
---|
1284 | | - I915_READ(RING_PP_DIR_BASE(engine)); |
---|
1285 | | - else if (INTEL_GEN(dev_priv) >= 8) |
---|
| 1210 | + ENGINE_READ(engine, RING_PP_DIR_BASE); |
---|
| 1211 | + } else if (INTEL_GEN(i915) >= 8) { |
---|
| 1212 | + u32 base = engine->mmio_base; |
---|
| 1213 | + |
---|
1286 | 1214 | for (i = 0; i < 4; i++) { |
---|
1287 | 1215 | ee->vm_info.pdp[i] = |
---|
1288 | | - I915_READ(GEN8_RING_PDP_UDW(engine, i)); |
---|
| 1216 | + intel_uncore_read(engine->uncore, |
---|
| 1217 | + GEN8_RING_PDP_UDW(base, i)); |
---|
1289 | 1218 | ee->vm_info.pdp[i] <<= 32; |
---|
1290 | 1219 | ee->vm_info.pdp[i] |= |
---|
1291 | | - I915_READ(GEN8_RING_PDP_LDW(engine, i)); |
---|
| 1220 | + intel_uncore_read(engine->uncore, |
---|
| 1221 | + GEN8_RING_PDP_LDW(base, i)); |
---|
1292 | 1222 | } |
---|
| 1223 | + } |
---|
1293 | 1224 | } |
---|
1294 | 1225 | } |
---|
1295 | 1226 | |
---|
1296 | | -static void record_request(struct i915_request *request, |
---|
1297 | | - struct drm_i915_error_request *erq) |
---|
| 1227 | +static void record_request(const struct i915_request *request, |
---|
| 1228 | + struct i915_request_coredump *erq) |
---|
1298 | 1229 | { |
---|
1299 | | - struct i915_gem_context *ctx = request->gem_context; |
---|
1300 | | - |
---|
1301 | | - erq->context = ctx->hw_id; |
---|
| 1230 | + erq->flags = request->fence.flags; |
---|
| 1231 | + erq->context = request->fence.context; |
---|
| 1232 | + erq->seqno = request->fence.seqno; |
---|
1302 | 1233 | erq->sched_attr = request->sched.attr; |
---|
1303 | | - erq->ban_score = atomic_read(&ctx->ban_score); |
---|
1304 | | - erq->seqno = request->global_seqno; |
---|
1305 | | - erq->jiffies = request->emitted_jiffies; |
---|
1306 | | - erq->start = i915_ggtt_offset(request->ring->vma); |
---|
1307 | 1234 | erq->head = request->head; |
---|
1308 | 1235 | erq->tail = request->tail; |
---|
1309 | 1236 | |
---|
| 1237 | + erq->pid = 0; |
---|
1310 | 1238 | rcu_read_lock(); |
---|
1311 | | - erq->pid = ctx->pid ? pid_nr(ctx->pid) : 0; |
---|
| 1239 | + if (!intel_context_is_closed(request->context)) { |
---|
| 1240 | + const struct i915_gem_context *ctx; |
---|
| 1241 | + |
---|
| 1242 | + ctx = rcu_dereference(request->context->gem_context); |
---|
| 1243 | + if (ctx) |
---|
| 1244 | + erq->pid = pid_nr(ctx->pid); |
---|
| 1245 | + } |
---|
1312 | 1246 | rcu_read_unlock(); |
---|
1313 | 1247 | } |
---|
1314 | 1248 | |
---|
1315 | | -static void engine_record_requests(struct intel_engine_cs *engine, |
---|
1316 | | - struct i915_request *first, |
---|
1317 | | - struct drm_i915_error_engine *ee) |
---|
| 1249 | +static void engine_record_execlists(struct intel_engine_coredump *ee) |
---|
1318 | 1250 | { |
---|
1319 | | - struct i915_request *request; |
---|
1320 | | - int count; |
---|
| 1251 | + const struct intel_engine_execlists * const el = &ee->engine->execlists; |
---|
| 1252 | + struct i915_request * const *port = el->active; |
---|
| 1253 | + unsigned int n = 0; |
---|
1321 | 1254 | |
---|
1322 | | - count = 0; |
---|
1323 | | - request = first; |
---|
1324 | | - list_for_each_entry_from(request, &engine->timeline.requests, link) |
---|
1325 | | - count++; |
---|
1326 | | - if (!count) |
---|
1327 | | - return; |
---|
1328 | | - |
---|
1329 | | - ee->requests = kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC); |
---|
1330 | | - if (!ee->requests) |
---|
1331 | | - return; |
---|
1332 | | - |
---|
1333 | | - ee->num_requests = count; |
---|
1334 | | - |
---|
1335 | | - count = 0; |
---|
1336 | | - request = first; |
---|
1337 | | - list_for_each_entry_from(request, &engine->timeline.requests, link) { |
---|
1338 | | - if (count >= ee->num_requests) { |
---|
1339 | | - /* |
---|
1340 | | - * If the ring request list was changed in |
---|
1341 | | - * between the point where the error request |
---|
1342 | | - * list was created and dimensioned and this |
---|
1343 | | - * point then just exit early to avoid crashes. |
---|
1344 | | - * |
---|
1345 | | - * We don't need to communicate that the |
---|
1346 | | - * request list changed state during error |
---|
1347 | | - * state capture and that the error state is |
---|
1348 | | - * slightly incorrect as a consequence since we |
---|
1349 | | - * are typically only interested in the request |
---|
1350 | | - * list state at the point of error state |
---|
1351 | | - * capture, not in any changes happening during |
---|
1352 | | - * the capture. |
---|
1353 | | - */ |
---|
1354 | | - break; |
---|
1355 | | - } |
---|
1356 | | - |
---|
1357 | | - record_request(request, &ee->requests[count++]); |
---|
1358 | | - } |
---|
1359 | | - ee->num_requests = count; |
---|
1360 | | -} |
---|
1361 | | - |
---|
1362 | | -static void error_record_engine_execlists(struct intel_engine_cs *engine, |
---|
1363 | | - struct drm_i915_error_engine *ee) |
---|
1364 | | -{ |
---|
1365 | | - const struct intel_engine_execlists * const execlists = &engine->execlists; |
---|
1366 | | - unsigned int n; |
---|
1367 | | - |
---|
1368 | | - for (n = 0; n < execlists_num_ports(execlists); n++) { |
---|
1369 | | - struct i915_request *rq = port_request(&execlists->port[n]); |
---|
1370 | | - |
---|
1371 | | - if (!rq) |
---|
1372 | | - break; |
---|
1373 | | - |
---|
1374 | | - record_request(rq, &ee->execlist[n]); |
---|
1375 | | - } |
---|
| 1255 | + while (*port) |
---|
| 1256 | + record_request(*port++, &ee->execlist[n++]); |
---|
1376 | 1257 | |
---|
1377 | 1258 | ee->num_ports = n; |
---|
1378 | 1259 | } |
---|
1379 | 1260 | |
---|
1380 | | -static void record_context(struct drm_i915_error_context *e, |
---|
1381 | | - struct i915_gem_context *ctx) |
---|
| 1261 | +static bool record_context(struct i915_gem_context_coredump *e, |
---|
| 1262 | + const struct i915_request *rq) |
---|
1382 | 1263 | { |
---|
1383 | | - if (ctx->pid) { |
---|
1384 | | - struct task_struct *task; |
---|
| 1264 | + struct i915_gem_context *ctx; |
---|
| 1265 | + struct task_struct *task; |
---|
| 1266 | + bool simulated; |
---|
1385 | 1267 | |
---|
1386 | | - rcu_read_lock(); |
---|
1387 | | - task = pid_task(ctx->pid, PIDTYPE_PID); |
---|
1388 | | - if (task) { |
---|
1389 | | - strcpy(e->comm, task->comm); |
---|
1390 | | - e->pid = task->pid; |
---|
1391 | | - } |
---|
1392 | | - rcu_read_unlock(); |
---|
| 1268 | + rcu_read_lock(); |
---|
| 1269 | + ctx = rcu_dereference(rq->context->gem_context); |
---|
| 1270 | + if (ctx && !kref_get_unless_zero(&ctx->ref)) |
---|
| 1271 | + ctx = NULL; |
---|
| 1272 | + rcu_read_unlock(); |
---|
| 1273 | + if (!ctx) |
---|
| 1274 | + return true; |
---|
| 1275 | + |
---|
| 1276 | + rcu_read_lock(); |
---|
| 1277 | + task = pid_task(ctx->pid, PIDTYPE_PID); |
---|
| 1278 | + if (task) { |
---|
| 1279 | + strcpy(e->comm, task->comm); |
---|
| 1280 | + e->pid = task->pid; |
---|
1393 | 1281 | } |
---|
| 1282 | + rcu_read_unlock(); |
---|
1394 | 1283 | |
---|
1395 | | - e->handle = ctx->user_handle; |
---|
1396 | | - e->hw_id = ctx->hw_id; |
---|
1397 | 1284 | e->sched_attr = ctx->sched; |
---|
1398 | | - e->ban_score = atomic_read(&ctx->ban_score); |
---|
1399 | | - e->bannable = i915_gem_context_is_bannable(ctx); |
---|
1400 | 1285 | e->guilty = atomic_read(&ctx->guilty_count); |
---|
1401 | 1286 | e->active = atomic_read(&ctx->active_count); |
---|
| 1287 | + |
---|
| 1288 | + e->total_runtime = rq->context->runtime.total; |
---|
| 1289 | + e->avg_runtime = ewma_runtime_read(&rq->context->runtime.avg); |
---|
| 1290 | + |
---|
| 1291 | + simulated = i915_gem_context_no_error_capture(ctx); |
---|
| 1292 | + |
---|
| 1293 | + i915_gem_context_put(ctx); |
---|
| 1294 | + return simulated; |
---|
1402 | 1295 | } |
---|
1403 | 1296 | |
---|
1404 | | -static void request_record_user_bo(struct i915_request *request, |
---|
1405 | | - struct drm_i915_error_engine *ee) |
---|
| 1297 | +struct intel_engine_capture_vma { |
---|
| 1298 | + struct intel_engine_capture_vma *next; |
---|
| 1299 | + struct i915_vma *vma; |
---|
| 1300 | + char name[16]; |
---|
| 1301 | +}; |
---|
| 1302 | + |
---|
| 1303 | +static struct intel_engine_capture_vma * |
---|
| 1304 | +capture_vma(struct intel_engine_capture_vma *next, |
---|
| 1305 | + struct i915_vma *vma, |
---|
| 1306 | + const char *name, |
---|
| 1307 | + gfp_t gfp) |
---|
| 1308 | +{ |
---|
| 1309 | + struct intel_engine_capture_vma *c; |
---|
| 1310 | + |
---|
| 1311 | + if (!vma) |
---|
| 1312 | + return next; |
---|
| 1313 | + |
---|
| 1314 | + c = kmalloc(sizeof(*c), gfp); |
---|
| 1315 | + if (!c) |
---|
| 1316 | + return next; |
---|
| 1317 | + |
---|
| 1318 | + if (!i915_active_acquire_if_busy(&vma->active)) { |
---|
| 1319 | + kfree(c); |
---|
| 1320 | + return next; |
---|
| 1321 | + } |
---|
| 1322 | + |
---|
| 1323 | + strcpy(c->name, name); |
---|
| 1324 | + c->vma = vma; /* reference held while active */ |
---|
| 1325 | + |
---|
| 1326 | + c->next = next; |
---|
| 1327 | + return c; |
---|
| 1328 | +} |
---|
| 1329 | + |
---|
| 1330 | +static struct intel_engine_capture_vma * |
---|
| 1331 | +capture_user(struct intel_engine_capture_vma *capture, |
---|
| 1332 | + const struct i915_request *rq, |
---|
| 1333 | + gfp_t gfp) |
---|
1406 | 1334 | { |
---|
1407 | 1335 | struct i915_capture_list *c; |
---|
1408 | | - struct drm_i915_error_object **bo; |
---|
1409 | | - long count; |
---|
1410 | 1336 | |
---|
1411 | | - count = 0; |
---|
1412 | | - for (c = request->capture_list; c; c = c->next) |
---|
1413 | | - count++; |
---|
| 1337 | + for (c = rq->capture_list; c; c = c->next) |
---|
| 1338 | + capture = capture_vma(capture, c->vma, "user", gfp); |
---|
1414 | 1339 | |
---|
1415 | | - bo = NULL; |
---|
1416 | | - if (count) |
---|
1417 | | - bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC); |
---|
1418 | | - if (!bo) |
---|
1419 | | - return; |
---|
1420 | | - |
---|
1421 | | - count = 0; |
---|
1422 | | - for (c = request->capture_list; c; c = c->next) { |
---|
1423 | | - bo[count] = i915_error_object_create(request->i915, c->vma); |
---|
1424 | | - if (!bo[count]) |
---|
1425 | | - break; |
---|
1426 | | - count++; |
---|
1427 | | - } |
---|
1428 | | - |
---|
1429 | | - ee->user_bo = bo; |
---|
1430 | | - ee->user_bo_count = count; |
---|
| 1340 | + return capture; |
---|
1431 | 1341 | } |
---|
1432 | 1342 | |
---|
1433 | | -static struct drm_i915_error_object * |
---|
1434 | | -capture_object(struct drm_i915_private *dev_priv, |
---|
1435 | | - struct drm_i915_gem_object *obj) |
---|
| 1343 | +static void add_vma(struct intel_engine_coredump *ee, |
---|
| 1344 | + struct i915_vma_coredump *vma) |
---|
1436 | 1345 | { |
---|
1437 | | - if (obj && i915_gem_object_has_pages(obj)) { |
---|
1438 | | - struct i915_vma fake = { |
---|
1439 | | - .node = { .start = U64_MAX, .size = obj->base.size }, |
---|
1440 | | - .size = obj->base.size, |
---|
1441 | | - .pages = obj->mm.pages, |
---|
1442 | | - .obj = obj, |
---|
1443 | | - }; |
---|
| 1346 | + if (vma) { |
---|
| 1347 | + vma->next = ee->vma; |
---|
| 1348 | + ee->vma = vma; |
---|
| 1349 | + } |
---|
| 1350 | +} |
---|
1444 | 1351 | |
---|
1445 | | - return i915_error_object_create(dev_priv, &fake); |
---|
1446 | | - } else { |
---|
| 1352 | +struct intel_engine_coredump * |
---|
| 1353 | +intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp) |
---|
| 1354 | +{ |
---|
| 1355 | + struct intel_engine_coredump *ee; |
---|
| 1356 | + |
---|
| 1357 | + ee = kzalloc(sizeof(*ee), gfp); |
---|
| 1358 | + if (!ee) |
---|
| 1359 | + return NULL; |
---|
| 1360 | + |
---|
| 1361 | + ee->engine = engine; |
---|
| 1362 | + |
---|
| 1363 | + engine_record_registers(ee); |
---|
| 1364 | + engine_record_execlists(ee); |
---|
| 1365 | + |
---|
| 1366 | + return ee; |
---|
| 1367 | +} |
---|
| 1368 | + |
---|
| 1369 | +struct intel_engine_capture_vma * |
---|
| 1370 | +intel_engine_coredump_add_request(struct intel_engine_coredump *ee, |
---|
| 1371 | + struct i915_request *rq, |
---|
| 1372 | + gfp_t gfp) |
---|
| 1373 | +{ |
---|
| 1374 | + struct intel_engine_capture_vma *vma = NULL; |
---|
| 1375 | + |
---|
| 1376 | + ee->simulated |= record_context(&ee->context, rq); |
---|
| 1377 | + if (ee->simulated) |
---|
| 1378 | + return NULL; |
---|
| 1379 | + |
---|
| 1380 | + /* |
---|
| 1381 | + * We need to copy these to an anonymous buffer |
---|
| 1382 | + * as the simplest method to avoid being overwritten |
---|
| 1383 | + * by userspace. |
---|
| 1384 | + */ |
---|
| 1385 | + vma = capture_vma(vma, rq->batch, "batch", gfp); |
---|
| 1386 | + vma = capture_user(vma, rq, gfp); |
---|
| 1387 | + vma = capture_vma(vma, rq->ring->vma, "ring", gfp); |
---|
| 1388 | + vma = capture_vma(vma, rq->context->state, "HW context", gfp); |
---|
| 1389 | + |
---|
| 1390 | + ee->rq_head = rq->head; |
---|
| 1391 | + ee->rq_post = rq->postfix; |
---|
| 1392 | + ee->rq_tail = rq->tail; |
---|
| 1393 | + |
---|
| 1394 | + return vma; |
---|
| 1395 | +} |
---|
| 1396 | + |
---|
| 1397 | +void |
---|
| 1398 | +intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, |
---|
| 1399 | + struct intel_engine_capture_vma *capture, |
---|
| 1400 | + struct i915_vma_compress *compress) |
---|
| 1401 | +{ |
---|
| 1402 | + const struct intel_engine_cs *engine = ee->engine; |
---|
| 1403 | + |
---|
| 1404 | + while (capture) { |
---|
| 1405 | + struct intel_engine_capture_vma *this = capture; |
---|
| 1406 | + struct i915_vma *vma = this->vma; |
---|
| 1407 | + |
---|
| 1408 | + add_vma(ee, |
---|
| 1409 | + i915_vma_coredump_create(engine->gt, |
---|
| 1410 | + vma, this->name, |
---|
| 1411 | + compress)); |
---|
| 1412 | + |
---|
| 1413 | + i915_active_release(&vma->active); |
---|
| 1414 | + |
---|
| 1415 | + capture = this->next; |
---|
| 1416 | + kfree(this); |
---|
| 1417 | + } |
---|
| 1418 | + |
---|
| 1419 | + add_vma(ee, |
---|
| 1420 | + i915_vma_coredump_create(engine->gt, |
---|
| 1421 | + engine->status_page.vma, |
---|
| 1422 | + "HW Status", |
---|
| 1423 | + compress)); |
---|
| 1424 | + |
---|
| 1425 | + add_vma(ee, |
---|
| 1426 | + i915_vma_coredump_create(engine->gt, |
---|
| 1427 | + engine->wa_ctx.vma, |
---|
| 1428 | + "WA context", |
---|
| 1429 | + compress)); |
---|
| 1430 | +} |
---|
| 1431 | + |
---|
| 1432 | +static struct intel_engine_coredump * |
---|
| 1433 | +capture_engine(struct intel_engine_cs *engine, |
---|
| 1434 | + struct i915_vma_compress *compress) |
---|
| 1435 | +{ |
---|
| 1436 | + struct intel_engine_capture_vma *capture = NULL; |
---|
| 1437 | + struct intel_engine_coredump *ee; |
---|
| 1438 | + struct i915_request *rq; |
---|
| 1439 | + unsigned long flags; |
---|
| 1440 | + |
---|
| 1441 | + ee = intel_engine_coredump_alloc(engine, GFP_KERNEL); |
---|
| 1442 | + if (!ee) |
---|
| 1443 | + return NULL; |
---|
| 1444 | + |
---|
| 1445 | + spin_lock_irqsave(&engine->active.lock, flags); |
---|
| 1446 | + rq = intel_engine_find_active_request(engine); |
---|
| 1447 | + if (rq) |
---|
| 1448 | + capture = intel_engine_coredump_add_request(ee, rq, |
---|
| 1449 | + ATOMIC_MAYFAIL); |
---|
| 1450 | + spin_unlock_irqrestore(&engine->active.lock, flags); |
---|
| 1451 | + if (!capture) { |
---|
| 1452 | + kfree(ee); |
---|
1447 | 1453 | return NULL; |
---|
1448 | 1454 | } |
---|
| 1455 | + |
---|
| 1456 | + intel_engine_coredump_add_vma(ee, capture, compress); |
---|
| 1457 | + |
---|
| 1458 | + return ee; |
---|
1449 | 1459 | } |
---|
1450 | 1460 | |
---|
1451 | | -static void gem_record_rings(struct i915_gpu_state *error) |
---|
| 1461 | +static void |
---|
| 1462 | +gt_record_engines(struct intel_gt_coredump *gt, |
---|
| 1463 | + struct i915_vma_compress *compress) |
---|
1452 | 1464 | { |
---|
1453 | | - struct drm_i915_private *i915 = error->i915; |
---|
1454 | | - struct i915_ggtt *ggtt = &i915->ggtt; |
---|
1455 | | - int i; |
---|
| 1465 | + struct intel_engine_cs *engine; |
---|
| 1466 | + enum intel_engine_id id; |
---|
1456 | 1467 | |
---|
1457 | | - for (i = 0; i < I915_NUM_ENGINES; i++) { |
---|
1458 | | - struct intel_engine_cs *engine = i915->engine[i]; |
---|
1459 | | - struct drm_i915_error_engine *ee = &error->engine[i]; |
---|
1460 | | - struct i915_request *request; |
---|
| 1468 | + for_each_engine(engine, gt->_gt, id) { |
---|
| 1469 | + struct intel_engine_coredump *ee; |
---|
1461 | 1470 | |
---|
1462 | | - ee->engine_id = -1; |
---|
| 1471 | + /* Refill our page pool before entering atomic section */ |
---|
| 1472 | + pool_refill(&compress->pool, ALLOW_FAIL); |
---|
1463 | 1473 | |
---|
1464 | | - if (!engine) |
---|
| 1474 | + ee = capture_engine(engine, compress); |
---|
| 1475 | + if (!ee) |
---|
1465 | 1476 | continue; |
---|
1466 | 1477 | |
---|
1467 | | - ee->engine_id = i; |
---|
1468 | | - |
---|
1469 | | - error_record_engine_registers(error, engine, ee); |
---|
1470 | | - error_record_engine_waiters(engine, ee); |
---|
1471 | | - error_record_engine_execlists(engine, ee); |
---|
1472 | | - |
---|
1473 | | - request = i915_gem_find_active_request(engine); |
---|
1474 | | - if (request) { |
---|
1475 | | - struct i915_gem_context *ctx = request->gem_context; |
---|
1476 | | - struct intel_ring *ring; |
---|
1477 | | - |
---|
1478 | | - ee->vm = ctx->ppgtt ? &ctx->ppgtt->vm : &ggtt->vm; |
---|
1479 | | - |
---|
1480 | | - record_context(&ee->context, ctx); |
---|
1481 | | - |
---|
1482 | | - /* We need to copy these to an anonymous buffer |
---|
1483 | | - * as the simplest method to avoid being overwritten |
---|
1484 | | - * by userspace. |
---|
1485 | | - */ |
---|
1486 | | - ee->batchbuffer = |
---|
1487 | | - i915_error_object_create(i915, request->batch); |
---|
1488 | | - |
---|
1489 | | - if (HAS_BROKEN_CS_TLB(i915)) |
---|
1490 | | - ee->wa_batchbuffer = |
---|
1491 | | - i915_error_object_create(i915, |
---|
1492 | | - engine->scratch); |
---|
1493 | | - request_record_user_bo(request, ee); |
---|
1494 | | - |
---|
1495 | | - ee->ctx = |
---|
1496 | | - i915_error_object_create(i915, |
---|
1497 | | - request->hw_context->state); |
---|
1498 | | - |
---|
1499 | | - error->simulated |= |
---|
1500 | | - i915_gem_context_no_error_capture(ctx); |
---|
1501 | | - |
---|
1502 | | - ee->rq_head = request->head; |
---|
1503 | | - ee->rq_post = request->postfix; |
---|
1504 | | - ee->rq_tail = request->tail; |
---|
1505 | | - |
---|
1506 | | - ring = request->ring; |
---|
1507 | | - ee->cpu_ring_head = ring->head; |
---|
1508 | | - ee->cpu_ring_tail = ring->tail; |
---|
1509 | | - ee->ringbuffer = |
---|
1510 | | - i915_error_object_create(i915, ring->vma); |
---|
1511 | | - |
---|
1512 | | - engine_record_requests(engine, request, ee); |
---|
| 1478 | + gt->simulated |= ee->simulated; |
---|
| 1479 | + if (ee->simulated) { |
---|
| 1480 | + kfree(ee); |
---|
| 1481 | + continue; |
---|
1513 | 1482 | } |
---|
1514 | 1483 | |
---|
1515 | | - ee->hws_page = |
---|
1516 | | - i915_error_object_create(i915, |
---|
1517 | | - engine->status_page.vma); |
---|
1518 | | - |
---|
1519 | | - ee->wa_ctx = i915_error_object_create(i915, engine->wa_ctx.vma); |
---|
1520 | | - |
---|
1521 | | - ee->default_state = capture_object(i915, engine->default_state); |
---|
| 1484 | + ee->next = gt->engine; |
---|
| 1485 | + gt->engine = ee; |
---|
1522 | 1486 | } |
---|
1523 | 1487 | } |
---|
1524 | 1488 | |
---|
1525 | | -static void gem_capture_vm(struct i915_gpu_state *error, |
---|
1526 | | - struct i915_address_space *vm, |
---|
1527 | | - int idx) |
---|
| 1489 | +static struct intel_uc_coredump * |
---|
| 1490 | +gt_record_uc(struct intel_gt_coredump *gt, |
---|
| 1491 | + struct i915_vma_compress *compress) |
---|
1528 | 1492 | { |
---|
1529 | | - struct drm_i915_error_buffer *active_bo; |
---|
1530 | | - struct i915_vma *vma; |
---|
1531 | | - int count; |
---|
| 1493 | + const struct intel_uc *uc = >->_gt->uc; |
---|
| 1494 | + struct intel_uc_coredump *error_uc; |
---|
1532 | 1495 | |
---|
1533 | | - count = 0; |
---|
1534 | | - list_for_each_entry(vma, &vm->active_list, vm_link) |
---|
1535 | | - count++; |
---|
| 1496 | + error_uc = kzalloc(sizeof(*error_uc), ALLOW_FAIL); |
---|
| 1497 | + if (!error_uc) |
---|
| 1498 | + return NULL; |
---|
1536 | 1499 | |
---|
1537 | | - active_bo = NULL; |
---|
1538 | | - if (count) |
---|
1539 | | - active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC); |
---|
1540 | | - if (active_bo) |
---|
1541 | | - count = capture_error_bo(active_bo, count, &vm->active_list, false); |
---|
1542 | | - else |
---|
1543 | | - count = 0; |
---|
1544 | | - |
---|
1545 | | - error->active_vm[idx] = vm; |
---|
1546 | | - error->active_bo[idx] = active_bo; |
---|
1547 | | - error->active_bo_count[idx] = count; |
---|
1548 | | -} |
---|
1549 | | - |
---|
1550 | | -static void capture_active_buffers(struct i915_gpu_state *error) |
---|
1551 | | -{ |
---|
1552 | | - int cnt = 0, i, j; |
---|
1553 | | - |
---|
1554 | | - BUILD_BUG_ON(ARRAY_SIZE(error->engine) > ARRAY_SIZE(error->active_bo)); |
---|
1555 | | - BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_vm)); |
---|
1556 | | - BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_bo_count)); |
---|
1557 | | - |
---|
1558 | | - /* Scan each engine looking for unique active contexts/vm */ |
---|
1559 | | - for (i = 0; i < ARRAY_SIZE(error->engine); i++) { |
---|
1560 | | - struct drm_i915_error_engine *ee = &error->engine[i]; |
---|
1561 | | - bool found; |
---|
1562 | | - |
---|
1563 | | - if (!ee->vm) |
---|
1564 | | - continue; |
---|
1565 | | - |
---|
1566 | | - found = false; |
---|
1567 | | - for (j = 0; j < i && !found; j++) |
---|
1568 | | - found = error->engine[j].vm == ee->vm; |
---|
1569 | | - if (!found) |
---|
1570 | | - gem_capture_vm(error, ee->vm, cnt++); |
---|
1571 | | - } |
---|
1572 | | -} |
---|
1573 | | - |
---|
1574 | | -static void capture_pinned_buffers(struct i915_gpu_state *error) |
---|
1575 | | -{ |
---|
1576 | | - struct i915_address_space *vm = &error->i915->ggtt.vm; |
---|
1577 | | - struct drm_i915_error_buffer *bo; |
---|
1578 | | - struct i915_vma *vma; |
---|
1579 | | - int count_inactive, count_active; |
---|
1580 | | - |
---|
1581 | | - count_inactive = 0; |
---|
1582 | | - list_for_each_entry(vma, &vm->inactive_list, vm_link) |
---|
1583 | | - count_inactive++; |
---|
1584 | | - |
---|
1585 | | - count_active = 0; |
---|
1586 | | - list_for_each_entry(vma, &vm->active_list, vm_link) |
---|
1587 | | - count_active++; |
---|
1588 | | - |
---|
1589 | | - bo = NULL; |
---|
1590 | | - if (count_inactive + count_active) |
---|
1591 | | - bo = kcalloc(count_inactive + count_active, |
---|
1592 | | - sizeof(*bo), GFP_ATOMIC); |
---|
1593 | | - if (!bo) |
---|
1594 | | - return; |
---|
1595 | | - |
---|
1596 | | - count_inactive = capture_error_bo(bo, count_inactive, |
---|
1597 | | - &vm->active_list, true); |
---|
1598 | | - count_active = capture_error_bo(bo + count_inactive, count_active, |
---|
1599 | | - &vm->inactive_list, true); |
---|
1600 | | - error->pinned_bo_count = count_inactive + count_active; |
---|
1601 | | - error->pinned_bo = bo; |
---|
1602 | | -} |
---|
1603 | | - |
---|
1604 | | -static void capture_uc_state(struct i915_gpu_state *error) |
---|
1605 | | -{ |
---|
1606 | | - struct drm_i915_private *i915 = error->i915; |
---|
1607 | | - struct i915_error_uc *error_uc = &error->uc; |
---|
1608 | | - |
---|
1609 | | - /* Capturing uC state won't be useful if there is no GuC */ |
---|
1610 | | - if (!error->device_info.has_guc) |
---|
1611 | | - return; |
---|
1612 | | - |
---|
1613 | | - error_uc->guc_fw = i915->guc.fw; |
---|
1614 | | - error_uc->huc_fw = i915->huc.fw; |
---|
| 1500 | + memcpy(&error_uc->guc_fw, &uc->guc.fw, sizeof(uc->guc.fw)); |
---|
| 1501 | + memcpy(&error_uc->huc_fw, &uc->huc.fw, sizeof(uc->huc.fw)); |
---|
1615 | 1502 | |
---|
1616 | 1503 | /* Non-default firmware paths will be specified by the modparam. |
---|
1617 | 1504 | * As modparams are generally accesible from the userspace make |
---|
1618 | 1505 | * explicit copies of the firmware paths. |
---|
1619 | 1506 | */ |
---|
1620 | | - error_uc->guc_fw.path = kstrdup(i915->guc.fw.path, GFP_ATOMIC); |
---|
1621 | | - error_uc->huc_fw.path = kstrdup(i915->huc.fw.path, GFP_ATOMIC); |
---|
1622 | | - error_uc->guc_log = i915_error_object_create(i915, i915->guc.log.vma); |
---|
| 1507 | + error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL); |
---|
| 1508 | + error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL); |
---|
| 1509 | + error_uc->guc_log = |
---|
| 1510 | + i915_vma_coredump_create(gt->_gt, |
---|
| 1511 | + uc->guc.log.vma, "GuC log buffer", |
---|
| 1512 | + compress); |
---|
| 1513 | + |
---|
| 1514 | + return error_uc; |
---|
| 1515 | +} |
---|
| 1516 | + |
---|
| 1517 | +static void gt_capture_prepare(struct intel_gt_coredump *gt) |
---|
| 1518 | +{ |
---|
| 1519 | + struct i915_ggtt *ggtt = gt->_gt->ggtt; |
---|
| 1520 | + |
---|
| 1521 | + mutex_lock(&ggtt->error_mutex); |
---|
| 1522 | +} |
---|
| 1523 | + |
---|
| 1524 | +static void gt_capture_finish(struct intel_gt_coredump *gt) |
---|
| 1525 | +{ |
---|
| 1526 | + struct i915_ggtt *ggtt = gt->_gt->ggtt; |
---|
| 1527 | + |
---|
| 1528 | + if (drm_mm_node_allocated(&ggtt->error_capture)) |
---|
| 1529 | + ggtt->vm.clear_range(&ggtt->vm, |
---|
| 1530 | + ggtt->error_capture.start, |
---|
| 1531 | + PAGE_SIZE); |
---|
| 1532 | + |
---|
| 1533 | + mutex_unlock(&ggtt->error_mutex); |
---|
1623 | 1534 | } |
---|
1624 | 1535 | |
---|
1625 | 1536 | /* Capture all registers which don't fit into another category. */ |
---|
1626 | | -static void capture_reg_state(struct i915_gpu_state *error) |
---|
| 1537 | +static void gt_record_regs(struct intel_gt_coredump *gt) |
---|
1627 | 1538 | { |
---|
1628 | | - struct drm_i915_private *dev_priv = error->i915; |
---|
| 1539 | + struct intel_uncore *uncore = gt->_gt->uncore; |
---|
| 1540 | + struct drm_i915_private *i915 = uncore->i915; |
---|
1629 | 1541 | int i; |
---|
1630 | 1542 | |
---|
1631 | | - /* General organization |
---|
| 1543 | + /* |
---|
| 1544 | + * General organization |
---|
1632 | 1545 | * 1. Registers specific to a single generation |
---|
1633 | 1546 | * 2. Registers which belong to multiple generations |
---|
1634 | 1547 | * 3. Feature specific registers. |
---|
.. | .. |
---|
1637 | 1550 | */ |
---|
1638 | 1551 | |
---|
1639 | 1552 | /* 1: Registers specific to a single generation */ |
---|
1640 | | - if (IS_VALLEYVIEW(dev_priv)) { |
---|
1641 | | - error->gtier[0] = I915_READ(GTIER); |
---|
1642 | | - error->ier = I915_READ(VLV_IER); |
---|
1643 | | - error->forcewake = I915_READ_FW(FORCEWAKE_VLV); |
---|
| 1553 | + if (IS_VALLEYVIEW(i915)) { |
---|
| 1554 | + gt->gtier[0] = intel_uncore_read(uncore, GTIER); |
---|
| 1555 | + gt->ier = intel_uncore_read(uncore, VLV_IER); |
---|
| 1556 | + gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_VLV); |
---|
1644 | 1557 | } |
---|
1645 | 1558 | |
---|
1646 | | - if (IS_GEN7(dev_priv)) |
---|
1647 | | - error->err_int = I915_READ(GEN7_ERR_INT); |
---|
| 1559 | + if (IS_GEN(i915, 7)) |
---|
| 1560 | + gt->err_int = intel_uncore_read(uncore, GEN7_ERR_INT); |
---|
1648 | 1561 | |
---|
1649 | | - if (INTEL_GEN(dev_priv) >= 8) { |
---|
1650 | | - error->fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0); |
---|
1651 | | - error->fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1); |
---|
| 1562 | + if (INTEL_GEN(i915) >= 12) { |
---|
| 1563 | + gt->fault_data0 = intel_uncore_read(uncore, |
---|
| 1564 | + GEN12_FAULT_TLB_DATA0); |
---|
| 1565 | + gt->fault_data1 = intel_uncore_read(uncore, |
---|
| 1566 | + GEN12_FAULT_TLB_DATA1); |
---|
| 1567 | + } else if (INTEL_GEN(i915) >= 8) { |
---|
| 1568 | + gt->fault_data0 = intel_uncore_read(uncore, |
---|
| 1569 | + GEN8_FAULT_TLB_DATA0); |
---|
| 1570 | + gt->fault_data1 = intel_uncore_read(uncore, |
---|
| 1571 | + GEN8_FAULT_TLB_DATA1); |
---|
1652 | 1572 | } |
---|
1653 | 1573 | |
---|
1654 | | - if (IS_GEN6(dev_priv)) { |
---|
1655 | | - error->forcewake = I915_READ_FW(FORCEWAKE); |
---|
1656 | | - error->gab_ctl = I915_READ(GAB_CTL); |
---|
1657 | | - error->gfx_mode = I915_READ(GFX_MODE); |
---|
| 1574 | + if (IS_GEN(i915, 6)) { |
---|
| 1575 | + gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE); |
---|
| 1576 | + gt->gab_ctl = intel_uncore_read(uncore, GAB_CTL); |
---|
| 1577 | + gt->gfx_mode = intel_uncore_read(uncore, GFX_MODE); |
---|
1658 | 1578 | } |
---|
1659 | 1579 | |
---|
1660 | 1580 | /* 2: Registers which belong to multiple generations */ |
---|
1661 | | - if (INTEL_GEN(dev_priv) >= 7) |
---|
1662 | | - error->forcewake = I915_READ_FW(FORCEWAKE_MT); |
---|
| 1581 | + if (INTEL_GEN(i915) >= 7) |
---|
| 1582 | + gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_MT); |
---|
1663 | 1583 | |
---|
1664 | | - if (INTEL_GEN(dev_priv) >= 6) { |
---|
1665 | | - error->derrmr = I915_READ(DERRMR); |
---|
1666 | | - error->error = I915_READ(ERROR_GEN6); |
---|
1667 | | - error->done_reg = I915_READ(DONE_REG); |
---|
| 1584 | + if (INTEL_GEN(i915) >= 6) { |
---|
| 1585 | + gt->derrmr = intel_uncore_read(uncore, DERRMR); |
---|
| 1586 | + if (INTEL_GEN(i915) < 12) { |
---|
| 1587 | + gt->error = intel_uncore_read(uncore, ERROR_GEN6); |
---|
| 1588 | + gt->done_reg = intel_uncore_read(uncore, DONE_REG); |
---|
| 1589 | + } |
---|
1668 | 1590 | } |
---|
1669 | 1591 | |
---|
1670 | | - if (INTEL_GEN(dev_priv) >= 5) |
---|
1671 | | - error->ccid = I915_READ(CCID); |
---|
1672 | | - |
---|
1673 | 1592 | /* 3: Feature specific registers */ |
---|
1674 | | - if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { |
---|
1675 | | - error->gam_ecochk = I915_READ(GAM_ECOCHK); |
---|
1676 | | - error->gac_eco = I915_READ(GAC_ECO_BITS); |
---|
| 1593 | + if (IS_GEN_RANGE(i915, 6, 7)) { |
---|
| 1594 | + gt->gam_ecochk = intel_uncore_read(uncore, GAM_ECOCHK); |
---|
| 1595 | + gt->gac_eco = intel_uncore_read(uncore, GAC_ECO_BITS); |
---|
| 1596 | + } |
---|
| 1597 | + |
---|
| 1598 | + if (IS_GEN_RANGE(i915, 8, 11)) |
---|
| 1599 | + gt->gtt_cache = intel_uncore_read(uncore, HSW_GTT_CACHE_EN); |
---|
| 1600 | + |
---|
| 1601 | + if (IS_GEN(i915, 12)) |
---|
| 1602 | + gt->aux_err = intel_uncore_read(uncore, GEN12_AUX_ERR_DBG); |
---|
| 1603 | + |
---|
| 1604 | + if (INTEL_GEN(i915) >= 12) { |
---|
| 1605 | + for (i = 0; i < GEN12_SFC_DONE_MAX; i++) { |
---|
| 1606 | + /* |
---|
| 1607 | + * SFC_DONE resides in the VD forcewake domain, so it |
---|
| 1608 | + * only exists if the corresponding VCS engine is |
---|
| 1609 | + * present. |
---|
| 1610 | + */ |
---|
| 1611 | + if (!HAS_ENGINE(gt->_gt, _VCS(i * 2))) |
---|
| 1612 | + continue; |
---|
| 1613 | + |
---|
| 1614 | + gt->sfc_done[i] = |
---|
| 1615 | + intel_uncore_read(uncore, GEN12_SFC_DONE(i)); |
---|
| 1616 | + } |
---|
| 1617 | + |
---|
| 1618 | + gt->gam_done = intel_uncore_read(uncore, GEN12_GAM_DONE); |
---|
1677 | 1619 | } |
---|
1678 | 1620 | |
---|
1679 | 1621 | /* 4: Everything else */ |
---|
1680 | | - if (INTEL_GEN(dev_priv) >= 11) { |
---|
1681 | | - error->ier = I915_READ(GEN8_DE_MISC_IER); |
---|
1682 | | - error->gtier[0] = I915_READ(GEN11_RENDER_COPY_INTR_ENABLE); |
---|
1683 | | - error->gtier[1] = I915_READ(GEN11_VCS_VECS_INTR_ENABLE); |
---|
1684 | | - error->gtier[2] = I915_READ(GEN11_GUC_SG_INTR_ENABLE); |
---|
1685 | | - error->gtier[3] = I915_READ(GEN11_GPM_WGBOXPERF_INTR_ENABLE); |
---|
1686 | | - error->gtier[4] = I915_READ(GEN11_CRYPTO_RSVD_INTR_ENABLE); |
---|
1687 | | - error->gtier[5] = I915_READ(GEN11_GUNIT_CSME_INTR_ENABLE); |
---|
1688 | | - error->ngtier = 6; |
---|
1689 | | - } else if (INTEL_GEN(dev_priv) >= 8) { |
---|
1690 | | - error->ier = I915_READ(GEN8_DE_MISC_IER); |
---|
| 1622 | + if (INTEL_GEN(i915) >= 11) { |
---|
| 1623 | + gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); |
---|
| 1624 | + gt->gtier[0] = |
---|
| 1625 | + intel_uncore_read(uncore, |
---|
| 1626 | + GEN11_RENDER_COPY_INTR_ENABLE); |
---|
| 1627 | + gt->gtier[1] = |
---|
| 1628 | + intel_uncore_read(uncore, GEN11_VCS_VECS_INTR_ENABLE); |
---|
| 1629 | + gt->gtier[2] = |
---|
| 1630 | + intel_uncore_read(uncore, GEN11_GUC_SG_INTR_ENABLE); |
---|
| 1631 | + gt->gtier[3] = |
---|
| 1632 | + intel_uncore_read(uncore, |
---|
| 1633 | + GEN11_GPM_WGBOXPERF_INTR_ENABLE); |
---|
| 1634 | + gt->gtier[4] = |
---|
| 1635 | + intel_uncore_read(uncore, |
---|
| 1636 | + GEN11_CRYPTO_RSVD_INTR_ENABLE); |
---|
| 1637 | + gt->gtier[5] = |
---|
| 1638 | + intel_uncore_read(uncore, |
---|
| 1639 | + GEN11_GUNIT_CSME_INTR_ENABLE); |
---|
| 1640 | + gt->ngtier = 6; |
---|
| 1641 | + } else if (INTEL_GEN(i915) >= 8) { |
---|
| 1642 | + gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); |
---|
1691 | 1643 | for (i = 0; i < 4; i++) |
---|
1692 | | - error->gtier[i] = I915_READ(GEN8_GT_IER(i)); |
---|
1693 | | - error->ngtier = 4; |
---|
1694 | | - } else if (HAS_PCH_SPLIT(dev_priv)) { |
---|
1695 | | - error->ier = I915_READ(DEIER); |
---|
1696 | | - error->gtier[0] = I915_READ(GTIER); |
---|
1697 | | - error->ngtier = 1; |
---|
1698 | | - } else if (IS_GEN2(dev_priv)) { |
---|
1699 | | - error->ier = I915_READ16(IER); |
---|
1700 | | - } else if (!IS_VALLEYVIEW(dev_priv)) { |
---|
1701 | | - error->ier = I915_READ(IER); |
---|
| 1644 | + gt->gtier[i] = |
---|
| 1645 | + intel_uncore_read(uncore, GEN8_GT_IER(i)); |
---|
| 1646 | + gt->ngtier = 4; |
---|
| 1647 | + } else if (HAS_PCH_SPLIT(i915)) { |
---|
| 1648 | + gt->ier = intel_uncore_read(uncore, DEIER); |
---|
| 1649 | + gt->gtier[0] = intel_uncore_read(uncore, GTIER); |
---|
| 1650 | + gt->ngtier = 1; |
---|
| 1651 | + } else if (IS_GEN(i915, 2)) { |
---|
| 1652 | + gt->ier = intel_uncore_read16(uncore, GEN2_IER); |
---|
| 1653 | + } else if (!IS_VALLEYVIEW(i915)) { |
---|
| 1654 | + gt->ier = intel_uncore_read(uncore, GEN2_IER); |
---|
1702 | 1655 | } |
---|
1703 | | - error->eir = I915_READ(EIR); |
---|
1704 | | - error->pgtbl_er = I915_READ(PGTBL_ER); |
---|
| 1656 | + gt->eir = intel_uncore_read(uncore, EIR); |
---|
| 1657 | + gt->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER); |
---|
1705 | 1658 | } |
---|
1706 | 1659 | |
---|
1707 | | -static void i915_error_capture_msg(struct drm_i915_private *dev_priv, |
---|
1708 | | - struct i915_gpu_state *error, |
---|
1709 | | - u32 engine_mask, |
---|
1710 | | - const char *error_msg) |
---|
| 1660 | +static void gt_record_info(struct intel_gt_coredump *gt) |
---|
1711 | 1661 | { |
---|
1712 | | - u32 ecode; |
---|
1713 | | - int engine_id = -1, len; |
---|
| 1662 | + memcpy(>->info, >->_gt->info, sizeof(struct intel_gt_info)); |
---|
| 1663 | +} |
---|
1714 | 1664 | |
---|
1715 | | - ecode = i915_error_generate_code(dev_priv, error, &engine_id); |
---|
| 1665 | +/* |
---|
| 1666 | + * Generate a semi-unique error code. The code is not meant to have meaning, The |
---|
| 1667 | + * code's only purpose is to try to prevent false duplicated bug reports by |
---|
| 1668 | + * grossly estimating a GPU error state. |
---|
| 1669 | + * |
---|
| 1670 | + * TODO Ideally, hashing the batchbuffer would be a very nice way to determine |
---|
| 1671 | + * the hang if we could strip the GTT offset information from it. |
---|
| 1672 | + * |
---|
| 1673 | + * It's only a small step better than a random number in its current form. |
---|
| 1674 | + */ |
---|
| 1675 | +static u32 generate_ecode(const struct intel_engine_coredump *ee) |
---|
| 1676 | +{ |
---|
| 1677 | + /* |
---|
| 1678 | + * IPEHR would be an ideal way to detect errors, as it's the gross |
---|
| 1679 | + * measure of "the command that hung." However, has some very common |
---|
| 1680 | + * synchronization commands which almost always appear in the case |
---|
| 1681 | + * strictly a client bug. Use instdone to differentiate those some. |
---|
| 1682 | + */ |
---|
| 1683 | + return ee ? ee->ipehr ^ ee->instdone.instdone : 0; |
---|
| 1684 | +} |
---|
| 1685 | + |
---|
| 1686 | +static const char *error_msg(struct i915_gpu_coredump *error) |
---|
| 1687 | +{ |
---|
| 1688 | + struct intel_engine_coredump *first = NULL; |
---|
| 1689 | + struct intel_gt_coredump *gt; |
---|
| 1690 | + intel_engine_mask_t engines; |
---|
| 1691 | + int len; |
---|
| 1692 | + |
---|
| 1693 | + engines = 0; |
---|
| 1694 | + for (gt = error->gt; gt; gt = gt->next) { |
---|
| 1695 | + struct intel_engine_coredump *cs; |
---|
| 1696 | + |
---|
| 1697 | + if (gt->engine && !first) |
---|
| 1698 | + first = gt->engine; |
---|
| 1699 | + |
---|
| 1700 | + for (cs = gt->engine; cs; cs = cs->next) |
---|
| 1701 | + engines |= cs->engine->mask; |
---|
| 1702 | + } |
---|
1716 | 1703 | |
---|
1717 | 1704 | len = scnprintf(error->error_msg, sizeof(error->error_msg), |
---|
1718 | | - "GPU HANG: ecode %d:%d:0x%08x", |
---|
1719 | | - INTEL_GEN(dev_priv), engine_id, ecode); |
---|
1720 | | - |
---|
1721 | | - if (engine_id != -1 && error->engine[engine_id].context.pid) |
---|
| 1705 | + "GPU HANG: ecode %d:%x:%08x", |
---|
| 1706 | + INTEL_GEN(error->i915), engines, |
---|
| 1707 | + generate_ecode(first)); |
---|
| 1708 | + if (first && first->context.pid) { |
---|
| 1709 | + /* Just show the first executing process, more is confusing */ |
---|
1722 | 1710 | len += scnprintf(error->error_msg + len, |
---|
1723 | 1711 | sizeof(error->error_msg) - len, |
---|
1724 | 1712 | ", in %s [%d]", |
---|
1725 | | - error->engine[engine_id].context.comm, |
---|
1726 | | - error->engine[engine_id].context.pid); |
---|
| 1713 | + first->context.comm, first->context.pid); |
---|
| 1714 | + } |
---|
1727 | 1715 | |
---|
1728 | | - scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, |
---|
1729 | | - ", reason: %s, action: %s", |
---|
1730 | | - error_msg, |
---|
1731 | | - engine_mask ? "reset" : "continue"); |
---|
| 1716 | + return error->error_msg; |
---|
1732 | 1717 | } |
---|
1733 | 1718 | |
---|
1734 | | -static void capture_gen_state(struct i915_gpu_state *error) |
---|
| 1719 | +static void capture_gen(struct i915_gpu_coredump *error) |
---|
1735 | 1720 | { |
---|
1736 | 1721 | struct drm_i915_private *i915 = error->i915; |
---|
1737 | 1722 | |
---|
1738 | | - error->awake = i915->gt.awake; |
---|
1739 | 1723 | error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count); |
---|
1740 | 1724 | error->suspended = i915->runtime_pm.suspended; |
---|
1741 | 1725 | |
---|
.. | .. |
---|
1746 | 1730 | error->reset_count = i915_reset_count(&i915->gpu_error); |
---|
1747 | 1731 | error->suspend_count = i915->suspend_count; |
---|
1748 | 1732 | |
---|
| 1733 | + i915_params_copy(&error->params, &i915->params); |
---|
1749 | 1734 | memcpy(&error->device_info, |
---|
1750 | 1735 | INTEL_INFO(i915), |
---|
1751 | 1736 | sizeof(error->device_info)); |
---|
| 1737 | + memcpy(&error->runtime_info, |
---|
| 1738 | + RUNTIME_INFO(i915), |
---|
| 1739 | + sizeof(error->runtime_info)); |
---|
1752 | 1740 | error->driver_caps = i915->caps; |
---|
1753 | 1741 | } |
---|
1754 | 1742 | |
---|
1755 | | -static __always_inline void dup_param(const char *type, void *x) |
---|
| 1743 | +struct i915_gpu_coredump * |
---|
| 1744 | +i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp) |
---|
1756 | 1745 | { |
---|
1757 | | - if (!__builtin_strcmp(type, "char *")) |
---|
1758 | | - *(void **)x = kstrdup(*(void **)x, GFP_ATOMIC); |
---|
1759 | | -} |
---|
| 1746 | + struct i915_gpu_coredump *error; |
---|
1760 | 1747 | |
---|
1761 | | -static void capture_params(struct i915_gpu_state *error) |
---|
1762 | | -{ |
---|
1763 | | - error->params = i915_modparams; |
---|
1764 | | -#define DUP(T, x, ...) dup_param(#T, &error->params.x); |
---|
1765 | | - I915_PARAMS_FOR_EACH(DUP); |
---|
1766 | | -#undef DUP |
---|
1767 | | -} |
---|
| 1748 | + if (!i915->params.error_capture) |
---|
| 1749 | + return NULL; |
---|
1768 | 1750 | |
---|
1769 | | -static unsigned long capture_find_epoch(const struct i915_gpu_state *error) |
---|
1770 | | -{ |
---|
1771 | | - unsigned long epoch = error->capture; |
---|
1772 | | - int i; |
---|
1773 | | - |
---|
1774 | | - for (i = 0; i < ARRAY_SIZE(error->engine); i++) { |
---|
1775 | | - const struct drm_i915_error_engine *ee = &error->engine[i]; |
---|
1776 | | - |
---|
1777 | | - if (ee->hangcheck_stalled && |
---|
1778 | | - time_before(ee->hangcheck_timestamp, epoch)) |
---|
1779 | | - epoch = ee->hangcheck_timestamp; |
---|
1780 | | - } |
---|
1781 | | - |
---|
1782 | | - return epoch; |
---|
1783 | | -} |
---|
1784 | | - |
---|
1785 | | -static int capture(void *data) |
---|
1786 | | -{ |
---|
1787 | | - struct i915_gpu_state *error = data; |
---|
1788 | | - |
---|
1789 | | - error->time = ktime_get_real(); |
---|
1790 | | - error->boottime = ktime_get_boottime(); |
---|
1791 | | - error->uptime = ktime_sub(ktime_get(), |
---|
1792 | | - error->i915->gt.last_init_time); |
---|
1793 | | - error->capture = jiffies; |
---|
1794 | | - |
---|
1795 | | - capture_params(error); |
---|
1796 | | - capture_gen_state(error); |
---|
1797 | | - capture_uc_state(error); |
---|
1798 | | - capture_reg_state(error); |
---|
1799 | | - gem_record_fences(error); |
---|
1800 | | - gem_record_rings(error); |
---|
1801 | | - capture_active_buffers(error); |
---|
1802 | | - capture_pinned_buffers(error); |
---|
1803 | | - |
---|
1804 | | - error->overlay = intel_overlay_capture_error_state(error->i915); |
---|
1805 | | - error->display = intel_display_capture_error_state(error->i915); |
---|
1806 | | - |
---|
1807 | | - error->epoch = capture_find_epoch(error); |
---|
1808 | | - |
---|
1809 | | - return 0; |
---|
1810 | | -} |
---|
1811 | | - |
---|
1812 | | -#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x)) |
---|
1813 | | - |
---|
1814 | | -struct i915_gpu_state * |
---|
1815 | | -i915_capture_gpu_state(struct drm_i915_private *i915) |
---|
1816 | | -{ |
---|
1817 | | - struct i915_gpu_state *error; |
---|
1818 | | - |
---|
1819 | | - error = kzalloc(sizeof(*error), GFP_ATOMIC); |
---|
| 1751 | + error = kzalloc(sizeof(*error), gfp); |
---|
1820 | 1752 | if (!error) |
---|
1821 | 1753 | return NULL; |
---|
1822 | 1754 | |
---|
1823 | 1755 | kref_init(&error->ref); |
---|
1824 | 1756 | error->i915 = i915; |
---|
1825 | 1757 | |
---|
1826 | | - stop_machine(capture, error, NULL); |
---|
| 1758 | + error->time = ktime_get_real(); |
---|
| 1759 | + error->boottime = ktime_get_boottime(); |
---|
| 1760 | + error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time); |
---|
| 1761 | + error->capture = jiffies; |
---|
| 1762 | + |
---|
| 1763 | + capture_gen(error); |
---|
1827 | 1764 | |
---|
1828 | 1765 | return error; |
---|
| 1766 | +} |
---|
| 1767 | + |
---|
| 1768 | +#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x)) |
---|
| 1769 | + |
---|
| 1770 | +struct intel_gt_coredump * |
---|
| 1771 | +intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp) |
---|
| 1772 | +{ |
---|
| 1773 | + struct intel_gt_coredump *gc; |
---|
| 1774 | + |
---|
| 1775 | + gc = kzalloc(sizeof(*gc), gfp); |
---|
| 1776 | + if (!gc) |
---|
| 1777 | + return NULL; |
---|
| 1778 | + |
---|
| 1779 | + gc->_gt = gt; |
---|
| 1780 | + gc->awake = intel_gt_pm_is_awake(gt); |
---|
| 1781 | + |
---|
| 1782 | + gt_record_regs(gc); |
---|
| 1783 | + gt_record_fences(gc); |
---|
| 1784 | + |
---|
| 1785 | + return gc; |
---|
| 1786 | +} |
---|
| 1787 | + |
---|
| 1788 | +struct i915_vma_compress * |
---|
| 1789 | +i915_vma_capture_prepare(struct intel_gt_coredump *gt) |
---|
| 1790 | +{ |
---|
| 1791 | + struct i915_vma_compress *compress; |
---|
| 1792 | + |
---|
| 1793 | + compress = kmalloc(sizeof(*compress), ALLOW_FAIL); |
---|
| 1794 | + if (!compress) |
---|
| 1795 | + return NULL; |
---|
| 1796 | + |
---|
| 1797 | + if (!compress_init(compress)) { |
---|
| 1798 | + kfree(compress); |
---|
| 1799 | + return NULL; |
---|
| 1800 | + } |
---|
| 1801 | + |
---|
| 1802 | + gt_capture_prepare(gt); |
---|
| 1803 | + |
---|
| 1804 | + return compress; |
---|
| 1805 | +} |
---|
| 1806 | + |
---|
| 1807 | +void i915_vma_capture_finish(struct intel_gt_coredump *gt, |
---|
| 1808 | + struct i915_vma_compress *compress) |
---|
| 1809 | +{ |
---|
| 1810 | + if (!compress) |
---|
| 1811 | + return; |
---|
| 1812 | + |
---|
| 1813 | + gt_capture_finish(gt); |
---|
| 1814 | + |
---|
| 1815 | + compress_fini(compress); |
---|
| 1816 | + kfree(compress); |
---|
| 1817 | +} |
---|
| 1818 | + |
---|
| 1819 | +struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915) |
---|
| 1820 | +{ |
---|
| 1821 | + struct i915_gpu_coredump *error; |
---|
| 1822 | + |
---|
| 1823 | + /* Check if GPU capture has been disabled */ |
---|
| 1824 | + error = READ_ONCE(i915->gpu_error.first_error); |
---|
| 1825 | + if (IS_ERR(error)) |
---|
| 1826 | + return error; |
---|
| 1827 | + |
---|
| 1828 | + error = i915_gpu_coredump_alloc(i915, ALLOW_FAIL); |
---|
| 1829 | + if (!error) |
---|
| 1830 | + return ERR_PTR(-ENOMEM); |
---|
| 1831 | + |
---|
| 1832 | + error->gt = intel_gt_coredump_alloc(&i915->gt, ALLOW_FAIL); |
---|
| 1833 | + if (error->gt) { |
---|
| 1834 | + struct i915_vma_compress *compress; |
---|
| 1835 | + |
---|
| 1836 | + compress = i915_vma_capture_prepare(error->gt); |
---|
| 1837 | + if (!compress) { |
---|
| 1838 | + kfree(error->gt); |
---|
| 1839 | + kfree(error); |
---|
| 1840 | + return ERR_PTR(-ENOMEM); |
---|
| 1841 | + } |
---|
| 1842 | + |
---|
| 1843 | + gt_record_info(error->gt); |
---|
| 1844 | + gt_record_engines(error->gt, compress); |
---|
| 1845 | + |
---|
| 1846 | + if (INTEL_INFO(i915)->has_gt_uc) |
---|
| 1847 | + error->gt->uc = gt_record_uc(error->gt, compress); |
---|
| 1848 | + |
---|
| 1849 | + i915_vma_capture_finish(error->gt, compress); |
---|
| 1850 | + |
---|
| 1851 | + error->simulated |= error->gt->simulated; |
---|
| 1852 | + } |
---|
| 1853 | + |
---|
| 1854 | + error->overlay = intel_overlay_capture_error_state(i915); |
---|
| 1855 | + error->display = intel_display_capture_error_state(i915); |
---|
| 1856 | + |
---|
| 1857 | + return error; |
---|
| 1858 | +} |
---|
| 1859 | + |
---|
| 1860 | +void i915_error_state_store(struct i915_gpu_coredump *error) |
---|
| 1861 | +{ |
---|
| 1862 | + struct drm_i915_private *i915; |
---|
| 1863 | + static bool warned; |
---|
| 1864 | + |
---|
| 1865 | + if (IS_ERR_OR_NULL(error)) |
---|
| 1866 | + return; |
---|
| 1867 | + |
---|
| 1868 | + i915 = error->i915; |
---|
| 1869 | + drm_info(&i915->drm, "%s\n", error_msg(error)); |
---|
| 1870 | + |
---|
| 1871 | + if (error->simulated || |
---|
| 1872 | + cmpxchg(&i915->gpu_error.first_error, NULL, error)) |
---|
| 1873 | + return; |
---|
| 1874 | + |
---|
| 1875 | + i915_gpu_coredump_get(error); |
---|
| 1876 | + |
---|
| 1877 | + if (!xchg(&warned, true) && |
---|
| 1878 | + ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) { |
---|
| 1879 | + pr_info("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n"); |
---|
| 1880 | + pr_info("Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/intel/issues/new.\n"); |
---|
| 1881 | + pr_info("Please see https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs for details.\n"); |
---|
| 1882 | + pr_info("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n"); |
---|
| 1883 | + pr_info("The GPU crash dump is required to analyze GPU hangs, so please always attach it.\n"); |
---|
| 1884 | + pr_info("GPU crash dump saved to /sys/class/drm/card%d/error\n", |
---|
| 1885 | + i915->drm.primary->index); |
---|
| 1886 | + } |
---|
1829 | 1887 | } |
---|
1830 | 1888 | |
---|
1831 | 1889 | /** |
---|
1832 | 1890 | * i915_capture_error_state - capture an error record for later analysis |
---|
1833 | 1891 | * @i915: i915 device |
---|
1834 | | - * @engine_mask: the mask of engines triggering the hang |
---|
1835 | | - * @error_msg: a message to insert into the error capture header |
---|
1836 | 1892 | * |
---|
1837 | 1893 | * Should be called when an error is detected (either a hang or an error |
---|
1838 | 1894 | * interrupt) to capture error state from the time of the error. Fills |
---|
1839 | 1895 | * out a structure which becomes available in debugfs for user level tools |
---|
1840 | 1896 | * to pick up. |
---|
1841 | 1897 | */ |
---|
1842 | | -void i915_capture_error_state(struct drm_i915_private *i915, |
---|
1843 | | - u32 engine_mask, |
---|
1844 | | - const char *error_msg) |
---|
| 1898 | +void i915_capture_error_state(struct drm_i915_private *i915) |
---|
1845 | 1899 | { |
---|
1846 | | - static bool warned; |
---|
1847 | | - struct i915_gpu_state *error; |
---|
1848 | | - unsigned long flags; |
---|
| 1900 | + struct i915_gpu_coredump *error; |
---|
1849 | 1901 | |
---|
1850 | | - if (!i915_modparams.error_capture) |
---|
1851 | | - return; |
---|
1852 | | - |
---|
1853 | | - if (READ_ONCE(i915->gpu_error.first_error)) |
---|
1854 | | - return; |
---|
1855 | | - |
---|
1856 | | - error = i915_capture_gpu_state(i915); |
---|
1857 | | - if (!error) { |
---|
1858 | | - DRM_DEBUG_DRIVER("out of memory, not capturing error state\n"); |
---|
| 1902 | + error = i915_gpu_coredump(i915); |
---|
| 1903 | + if (IS_ERR(error)) { |
---|
| 1904 | + cmpxchg(&i915->gpu_error.first_error, NULL, error); |
---|
1859 | 1905 | return; |
---|
1860 | 1906 | } |
---|
1861 | 1907 | |
---|
1862 | | - i915_error_capture_msg(i915, error, engine_mask, error_msg); |
---|
1863 | | - DRM_INFO("%s\n", error->error_msg); |
---|
1864 | | - |
---|
1865 | | - if (!error->simulated) { |
---|
1866 | | - spin_lock_irqsave(&i915->gpu_error.lock, flags); |
---|
1867 | | - if (!i915->gpu_error.first_error) { |
---|
1868 | | - i915->gpu_error.first_error = error; |
---|
1869 | | - error = NULL; |
---|
1870 | | - } |
---|
1871 | | - spin_unlock_irqrestore(&i915->gpu_error.lock, flags); |
---|
1872 | | - } |
---|
1873 | | - |
---|
1874 | | - if (error) { |
---|
1875 | | - __i915_gpu_state_free(&error->ref); |
---|
1876 | | - return; |
---|
1877 | | - } |
---|
1878 | | - |
---|
1879 | | - if (!warned && |
---|
1880 | | - ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) { |
---|
1881 | | - DRM_INFO("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n"); |
---|
1882 | | - DRM_INFO("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n"); |
---|
1883 | | - DRM_INFO("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n"); |
---|
1884 | | - DRM_INFO("The gpu crash dump is required to analyze gpu hangs, so please always attach it.\n"); |
---|
1885 | | - DRM_INFO("GPU crash dump saved to /sys/class/drm/card%d/error\n", |
---|
1886 | | - i915->drm.primary->index); |
---|
1887 | | - warned = true; |
---|
1888 | | - } |
---|
| 1908 | + i915_error_state_store(error); |
---|
| 1909 | + i915_gpu_coredump_put(error); |
---|
1889 | 1910 | } |
---|
1890 | 1911 | |
---|
1891 | | -struct i915_gpu_state * |
---|
| 1912 | +struct i915_gpu_coredump * |
---|
1892 | 1913 | i915_first_error_state(struct drm_i915_private *i915) |
---|
1893 | 1914 | { |
---|
1894 | | - struct i915_gpu_state *error; |
---|
| 1915 | + struct i915_gpu_coredump *error; |
---|
1895 | 1916 | |
---|
1896 | 1917 | spin_lock_irq(&i915->gpu_error.lock); |
---|
1897 | 1918 | error = i915->gpu_error.first_error; |
---|
1898 | | - if (error) |
---|
1899 | | - i915_gpu_state_get(error); |
---|
| 1919 | + if (!IS_ERR_OR_NULL(error)) |
---|
| 1920 | + i915_gpu_coredump_get(error); |
---|
1900 | 1921 | spin_unlock_irq(&i915->gpu_error.lock); |
---|
1901 | 1922 | |
---|
1902 | 1923 | return error; |
---|
.. | .. |
---|
1904 | 1925 | |
---|
1905 | 1926 | void i915_reset_error_state(struct drm_i915_private *i915) |
---|
1906 | 1927 | { |
---|
1907 | | - struct i915_gpu_state *error; |
---|
| 1928 | + struct i915_gpu_coredump *error; |
---|
1908 | 1929 | |
---|
1909 | 1930 | spin_lock_irq(&i915->gpu_error.lock); |
---|
1910 | 1931 | error = i915->gpu_error.first_error; |
---|
1911 | | - i915->gpu_error.first_error = NULL; |
---|
| 1932 | + if (error != ERR_PTR(-ENODEV)) /* if disabled, always disabled */ |
---|
| 1933 | + i915->gpu_error.first_error = NULL; |
---|
1912 | 1934 | spin_unlock_irq(&i915->gpu_error.lock); |
---|
1913 | 1935 | |
---|
1914 | | - i915_gpu_state_put(error); |
---|
| 1936 | + if (!IS_ERR_OR_NULL(error)) |
---|
| 1937 | + i915_gpu_coredump_put(error); |
---|
| 1938 | +} |
---|
| 1939 | + |
---|
| 1940 | +void i915_disable_error_state(struct drm_i915_private *i915, int err) |
---|
| 1941 | +{ |
---|
| 1942 | + spin_lock_irq(&i915->gpu_error.lock); |
---|
| 1943 | + if (!i915->gpu_error.first_error) |
---|
| 1944 | + i915->gpu_error.first_error = ERR_PTR(err); |
---|
| 1945 | + spin_unlock_irq(&i915->gpu_error.lock); |
---|
1915 | 1946 | } |
---|