hc
2023-12-06 08f87f769b595151be1afeff53e144f543faa614
kernel/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c
....@@ -1,7 +1,7 @@
11 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
22 /*
33 *
4
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
4
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
55 *
66 * This program is free software and is provided to you under the terms of the
77 * GNU General Public License version 2 as published by the Free Software
....@@ -29,6 +29,20 @@
2929 #include <device/mali_kbase_device.h>
3030 #include <backend/gpu/mali_kbase_instr_internal.h>
3131
32
+static int wait_prfcnt_ready(struct kbase_device *kbdev)
33
+{
34
+ u32 loops;
35
+
36
+ for (loops = 0; loops < KBASE_PRFCNT_ACTIVE_MAX_LOOPS; loops++) {
37
+ const u32 prfcnt_active = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) &
38
+ GPU_STATUS_PRFCNT_ACTIVE;
39
+ if (!prfcnt_active)
40
+ return 0;
41
+ }
42
+
43
+ dev_err(kbdev->dev, "PRFCNT_ACTIVE bit stuck\n");
44
+ return -EBUSY;
45
+}
3246
3347 int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
3448 struct kbase_context *kctx,
....@@ -43,14 +57,20 @@
4357
4458 /* alignment failure */
4559 if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1)))
46
- goto out_err;
60
+ return err;
4761
4862 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
4963
5064 if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
5165 /* Instrumentation is already enabled */
5266 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
53
- goto out_err;
67
+ return err;
68
+ }
69
+
70
+ if (kbase_is_gpu_removed(kbdev)) {
71
+ /* GPU has been removed by Arbiter */
72
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
73
+ return err;
5474 }
5575
5676 /* Enable interrupt */
....@@ -75,8 +95,18 @@
7595 prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT;
7696 #endif
7797
98
+ /* Wait until prfcnt config register can be written */
99
+ err = wait_prfcnt_ready(kbdev);
100
+ if (err)
101
+ return err;
102
+
78103 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
79104 prfcnt_config | PRFCNT_CONFIG_MODE_OFF);
105
+
106
+ /* Wait until prfcnt is disabled before writing configuration registers */
107
+ err = wait_prfcnt_ready(kbdev);
108
+ if (err)
109
+ return err;
80110
81111 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
82112 enable->dump_buffer & 0xFFFFFFFF);
....@@ -105,37 +135,68 @@
105135
106136 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
107137
108
- err = 0;
109
-
110138 dev_dbg(kbdev->dev, "HW counters dumping set-up for context %pK", kctx);
111
- return err;
112
- out_err:
113
- return err;
139
+ return 0;
140
+}
141
+
142
+static void kbasep_instr_hwc_disable_hw_prfcnt(struct kbase_device *kbdev)
143
+{
144
+ u32 irq_mask;
145
+
146
+ lockdep_assert_held(&kbdev->hwaccess_lock);
147
+ lockdep_assert_held(&kbdev->hwcnt.lock);
148
+
149
+ if (kbase_is_gpu_removed(kbdev))
150
+ /* GPU has been removed by Arbiter */
151
+ return;
152
+
153
+ /* Disable interrupt */
154
+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
155
+
156
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED);
157
+
158
+ /* Wait until prfcnt config register can be written, then disable the counters.
159
+ * Return value is ignored as we are disabling anyway.
160
+ */
161
+ wait_prfcnt_ready(kbdev);
162
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0);
163
+
164
+ kbdev->hwcnt.kctx = NULL;
165
+ kbdev->hwcnt.addr = 0ULL;
166
+ kbdev->hwcnt.addr_bytes = 0ULL;
114167 }
115168
116169 int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
117170 {
118171 unsigned long flags, pm_flags;
119
- int err = -EINVAL;
120
- u32 irq_mask;
121172 struct kbase_device *kbdev = kctx->kbdev;
122173
123174 while (1) {
124175 spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
125176 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
126177
178
+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) {
179
+ /* Instrumentation is in unrecoverable error state,
180
+ * there is nothing for us to do.
181
+ */
182
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
183
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
184
+ /* Already disabled, return no error. */
185
+ return 0;
186
+ }
187
+
127188 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
128189 /* Instrumentation is not enabled */
129190 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
130191 spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
131
- goto out;
192
+ return -EINVAL;
132193 }
133194
134195 if (kbdev->hwcnt.kctx != kctx) {
135196 /* Instrumentation has been setup for another context */
136197 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
137198 spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
138
- goto out;
199
+ return -EINVAL;
139200 }
140201
141202 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
....@@ -152,17 +213,7 @@
152213 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
153214 kbdev->hwcnt.backend.triggered = 0;
154215
155
- /* Disable interrupt */
156
- irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
157
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
158
- irq_mask & ~PRFCNT_SAMPLE_COMPLETED);
159
-
160
- /* Disable the counters */
161
- kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0);
162
-
163
- kbdev->hwcnt.kctx = NULL;
164
- kbdev->hwcnt.addr = 0ULL;
165
- kbdev->hwcnt.addr_bytes = 0ULL;
216
+ kbasep_instr_hwc_disable_hw_prfcnt(kbdev);
166217
167218 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
168219 spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
....@@ -170,9 +221,7 @@
170221 dev_dbg(kbdev->dev, "HW counters dumping disabled for context %pK",
171222 kctx);
172223
173
- err = 0;
174
- out:
175
- return err;
224
+ return 0;
176225 }
177226
178227 int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
....@@ -190,8 +239,13 @@
190239
191240 if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
192241 /* HW counters are disabled or another dump is ongoing, or we're
193
- * resetting
242
+ * resetting, or we are in unrecoverable error state.
194243 */
244
+ goto unlock;
245
+ }
246
+
247
+ if (kbase_is_gpu_removed(kbdev)) {
248
+ /* GPU has been removed by Arbiter */
195249 goto unlock;
196250 }
197251
....@@ -200,6 +254,11 @@
200254 /* Mark that we're dumping - the PF handler can signal that we faulted
201255 */
202256 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
257
+
258
+ /* Wait until prfcnt is ready to request dump */
259
+ err = wait_prfcnt_ready(kbdev);
260
+ if (err)
261
+ goto unlock;
203262
204263 /* Reconfigure the dump address */
205264 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
....@@ -216,11 +275,8 @@
216275
217276 dev_dbg(kbdev->dev, "HW counters dumping done for context %pK", kctx);
218277
219
- err = 0;
220
-
221278 unlock:
222279 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
223
-
224280 return err;
225281 }
226282 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
....@@ -255,6 +311,10 @@
255311
256312 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
257313
314
+ /* If the state is in unrecoverable error, we already wake_up the waiter
315
+ * and don't need to do any action when sample is done.
316
+ */
317
+
258318 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
259319 kbdev->hwcnt.backend.triggered = 1;
260320 wake_up(&kbdev->hwcnt.backend.wait);
....@@ -283,6 +343,8 @@
283343 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
284344 err = -EINVAL;
285345 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
346
+ } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) {
347
+ err = -EIO;
286348 } else {
287349 /* Dump done */
288350 KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
....@@ -303,25 +365,75 @@
303365
304366 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
305367
306
- /* Check it's the context previously set up and we're not already
307
- * dumping
368
+ /* Check it's the context previously set up and we're not in IDLE
369
+ * state.
308370 */
309371 if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
310372 KBASE_INSTR_STATE_IDLE)
311
- goto out;
373
+ goto unlock;
374
+
375
+ if (kbase_is_gpu_removed(kbdev)) {
376
+ /* GPU has been removed by Arbiter */
377
+ goto unlock;
378
+ }
379
+
380
+ /* Wait until prfcnt is ready to clear */
381
+ err = wait_prfcnt_ready(kbdev);
382
+ if (err)
383
+ goto unlock;
312384
313385 /* Clear the counters */
314386 KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, 0);
315387 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
316388 GPU_COMMAND_PRFCNT_CLEAR);
317389
318
- err = 0;
319
-
320
-out:
390
+unlock:
321391 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
322392 return err;
323393 }
324394 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
395
+
396
+void kbase_instr_hwcnt_on_unrecoverable_error(struct kbase_device *kbdev)
397
+{
398
+ unsigned long flags;
399
+
400
+ lockdep_assert_held(&kbdev->hwaccess_lock);
401
+
402
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
403
+
404
+ /* If we already in unrecoverable error state, early return. */
405
+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR) {
406
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
407
+ return;
408
+ }
409
+
410
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_UNRECOVERABLE_ERROR;
411
+
412
+ /* Need to disable HW if it's not disabled yet. */
413
+ if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED)
414
+ kbasep_instr_hwc_disable_hw_prfcnt(kbdev);
415
+
416
+ /* Wake up any waiters. */
417
+ kbdev->hwcnt.backend.triggered = 1;
418
+ wake_up(&kbdev->hwcnt.backend.wait);
419
+
420
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
421
+}
422
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_on_unrecoverable_error);
423
+
424
+void kbase_instr_hwcnt_on_before_reset(struct kbase_device *kbdev)
425
+{
426
+ unsigned long flags;
427
+
428
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
429
+
430
+ /* A reset is the only way to exit the unrecoverable error state */
431
+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_UNRECOVERABLE_ERROR)
432
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
433
+
434
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
435
+}
436
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_on_before_reset);
325437
326438 int kbase_instr_backend_init(struct kbase_device *kbdev)
327439 {
....@@ -336,12 +448,12 @@
336448 #ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
337449 /* Use the build time option for the override default. */
338450 #if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY)
339
- kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_SECONDARY;
451
+ kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_SECONDARY;
340452 #elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
341
- kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_TERTIARY;
453
+ kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_TERTIARY;
342454 #else
343455 /* Default to primary */
344
- kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_PRIMARY;
456
+ kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_PRIMARY;
345457 #endif
346458 #endif
347459 return 0;
....@@ -361,8 +473,8 @@
361473 *
362474 * Valid inputs are the values accepted bythe SET_SELECT bits of the
363475 * PRFCNT_CONFIG register as defined in the architecture specification.
364
- */
365
- debugfs_create_u8("hwcnt_set_select", S_IRUGO | S_IWUSR,
476
+ */
477
+ debugfs_create_u8("hwcnt_set_select", 0644,
366478 kbdev->mali_debugfs_directory,
367479 (u8 *)&kbdev->hwcnt.backend.override_counter_set);
368480 }