forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-01-31 f70575805708cabdedea7498aaa3f710fde4d920
kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
....@@ -1,7 +1,7 @@
11 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
22 /*
33 *
4
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
4
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
55 *
66 * This program is free software and is provided to you under the terms of the
77 * GNU General Public License version 2 as published by the Free Software
....@@ -27,10 +27,13 @@
2727 #include "mali_kbase_reset_gpu.h"
2828 #include "mali_kbase_ctx_sched.h"
2929 #include "device/mali_kbase_device.h"
30
+#include <mali_kbase_hwaccess_time.h>
3031 #include "backend/gpu/mali_kbase_pm_internal.h"
3132 #include "mali_kbase_csf_scheduler.h"
3233 #include "mmu/mali_kbase_mmu.h"
3334 #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
35
+#include <backend/gpu/mali_kbase_model_linux.h>
36
+#include <csf/mali_kbase_csf_registers.h>
3437
3538 #include <linux/list.h>
3639 #include <linux/slab.h>
....@@ -100,7 +103,7 @@
100103
101104 #define CSF_GLB_REQ_CFG_MASK \
102105 (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \
103
- GLB_REQ_CFG_PWROFF_TIMER_MASK)
106
+ GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
104107
105108 static inline u32 input_page_read(const u32 *const input, const u32 offset)
106109 {
....@@ -115,15 +118,6 @@
115118 WARN_ON(offset % sizeof(u32));
116119
117120 input[offset / sizeof(u32)] = value;
118
-}
119
-
120
-static inline void input_page_partial_write(u32 *const input, const u32 offset,
121
- u32 value, u32 mask)
122
-{
123
- WARN_ON(offset % sizeof(u32));
124
-
125
- input[offset / sizeof(u32)] =
126
- (input_page_read(input, offset) & ~mask) | (value & mask);
127121 }
128122
129123 static inline u32 output_page_read(const u32 *const output, const u32 offset)
....@@ -144,13 +138,13 @@
144138 /**
145139 * invent_memory_setup_entry() - Invent an "interface memory setup" section
146140 *
141
+ * @kbdev: Kbase device structure
142
+ *
147143 * Invent an "interface memory setup" section similar to one from a firmware
148144 * image. If successful the interface will be added to the
149145 * kbase_device:csf.firmware_interfaces list.
150146 *
151147 * Return: 0 if successful, negative error code on failure
152
- *
153
- * @kbdev: Kbase device structure
154148 */
155149 static int invent_memory_setup_entry(struct kbase_device *kbdev)
156150 {
....@@ -201,9 +195,8 @@
201195 ginfo->stream_stride = 0;
202196
203197 ginfo->streams = kcalloc(ginfo->stream_num, sizeof(*ginfo->streams), GFP_KERNEL);
204
- if (ginfo->streams == NULL) {
198
+ if (ginfo->streams == NULL)
205199 return -ENOMEM;
206
- }
207200
208201 for (sid = 0; sid < ginfo->stream_num; ++sid) {
209202 struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[sid];
....@@ -236,7 +229,8 @@
236229 iface->version = 1;
237230 iface->kbdev = kbdev;
238231 iface->features = 0;
239
- iface->prfcnt_size = 64;
232
+ iface->prfcnt_size =
233
+ GLB_PRFCNT_SIZE_HARDWARE_SIZE_SET(0, KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE);
240234
241235 if (iface->version >= kbase_csf_interface_version(1, 1, 0)) {
242236 /* update rate=1, max event size = 1<<8 = 256 */
....@@ -249,9 +243,8 @@
249243 iface->group_stride = 0;
250244
251245 iface->groups = kcalloc(iface->group_num, sizeof(*iface->groups), GFP_KERNEL);
252
- if (iface->groups == NULL) {
246
+ if (iface->groups == NULL)
253247 return -ENOMEM;
254
- }
255248
256249 for (gid = 0; gid < iface->group_num; ++gid) {
257250 int err;
....@@ -275,6 +268,18 @@
275268
276269
277270 void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
271
+ u32 gpu_addr, u32 value)
272
+{
273
+ /* NO_MALI: Nothing to do here */
274
+}
275
+
276
+void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev,
277
+ u32 gpu_addr, u32 *value)
278
+{
279
+ /* NO_MALI: Nothing to do here */
280
+}
281
+
282
+void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev,
278283 u32 gpu_addr, u32 value)
279284 {
280285 /* NO_MALI: Nothing to do here */
....@@ -379,37 +384,7 @@
379384 dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val);
380385 return val;
381386 }
382
-
383
-static void
384
-csf_firmware_prfcnt_process(const struct kbase_csf_global_iface *const iface,
385
- const u32 glb_req)
386
-{
387
- struct kbase_device *kbdev = iface->kbdev;
388
- u32 glb_ack = output_page_read(iface->output, GLB_ACK);
389
- /* If the value of GLB_REQ.PRFCNT_SAMPLE is different from the value of
390
- * GLB_ACK.PRFCNT_SAMPLE, the CSF will sample the performance counters.
391
- */
392
- if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_SAMPLE_MASK) {
393
- /* NO_MALI only uses the first buffer in the ring buffer. */
394
- input_page_write(iface->input, GLB_PRFCNT_EXTRACT, 0);
395
- output_page_write(iface->output, GLB_PRFCNT_INSERT, 1);
396
- kbase_reg_write(kbdev, GPU_COMMAND, GPU_COMMAND_PRFCNT_SAMPLE);
397
- }
398
-
399
- /* Propagate enable masks to model if request to enable. */
400
- if (glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) {
401
- u32 tiler_en, l2_en, sc_en;
402
-
403
- tiler_en = input_page_read(iface->input, GLB_PRFCNT_TILER_EN);
404
- l2_en = input_page_read(iface->input, GLB_PRFCNT_MMU_L2_EN);
405
- sc_en = input_page_read(iface->input, GLB_PRFCNT_SHADER_EN);
406
-
407
- /* NO_MALI platform enabled all CSHW counters by default. */
408
- kbase_reg_write(kbdev, PRFCNT_TILER_EN, tiler_en);
409
- kbase_reg_write(kbdev, PRFCNT_MMU_L2_EN, l2_en);
410
- kbase_reg_write(kbdev, PRFCNT_SHADER_EN, sc_en);
411
- }
412
-}
387
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_csg_output);
413388
414389 void kbase_csf_firmware_global_input(
415390 const struct kbase_csf_global_iface *const iface, const u32 offset,
....@@ -421,11 +396,20 @@
421396 input_page_write(iface->input, offset, value);
422397
423398 if (offset == GLB_REQ) {
424
- csf_firmware_prfcnt_process(iface, value);
425
- /* NO_MALI: Immediately acknowledge requests */
426
- output_page_write(iface->output, GLB_ACK, value);
399
+ /* NO_MALI: Immediately acknowledge requests - except for PRFCNT_ENABLE
400
+ * and PRFCNT_SAMPLE. These will be processed along with the
401
+ * corresponding performance counter registers when the global doorbell
402
+ * is rung in order to emulate the performance counter sampling behavior
403
+ * of the real firmware.
404
+ */
405
+ const u32 ack = output_page_read(iface->output, GLB_ACK);
406
+ const u32 req_mask = ~(GLB_REQ_PRFCNT_ENABLE_MASK | GLB_REQ_PRFCNT_SAMPLE_MASK);
407
+ const u32 toggled = (value ^ ack) & req_mask;
408
+
409
+ output_page_write(iface->output, GLB_ACK, ack ^ toggled);
427410 }
428411 }
412
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input);
429413
430414 void kbase_csf_firmware_global_input_mask(
431415 const struct kbase_csf_global_iface *const iface, const u32 offset,
....@@ -439,6 +423,7 @@
439423 /* NO_MALI: Go through kbase_csf_firmware_global_input to capture writes */
440424 kbase_csf_firmware_global_input(iface, offset, (input_page_read(iface->input, offset) & ~mask) | (value & mask));
441425 }
426
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_input_mask);
442427
443428 u32 kbase_csf_firmware_global_input_read(
444429 const struct kbase_csf_global_iface *const iface, const u32 offset)
....@@ -459,6 +444,100 @@
459444 dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val);
460445 return val;
461446 }
447
+KBASE_EXPORT_TEST_API(kbase_csf_firmware_global_output);
448
+
449
+/**
450
+ * csf_doorbell_prfcnt() - Process CSF performance counter doorbell request
451
+ *
452
+ * @kbdev: An instance of the GPU platform device
453
+ */
454
+static void csf_doorbell_prfcnt(struct kbase_device *kbdev)
455
+{
456
+ struct kbase_csf_global_iface *iface;
457
+ u32 req;
458
+ u32 ack;
459
+ u32 extract_index;
460
+
461
+ if (WARN_ON(!kbdev))
462
+ return;
463
+
464
+ iface = &kbdev->csf.global_iface;
465
+
466
+ req = input_page_read(iface->input, GLB_REQ);
467
+ ack = output_page_read(iface->output, GLB_ACK);
468
+ extract_index = input_page_read(iface->input, GLB_PRFCNT_EXTRACT);
469
+
470
+ /* Process enable bit toggle */
471
+ if ((req ^ ack) & GLB_REQ_PRFCNT_ENABLE_MASK) {
472
+ if (req & GLB_REQ_PRFCNT_ENABLE_MASK) {
473
+ /* Reset insert index to zero on enable bit set */
474
+ output_page_write(iface->output, GLB_PRFCNT_INSERT, 0);
475
+ WARN_ON(extract_index != 0);
476
+ }
477
+ ack ^= GLB_REQ_PRFCNT_ENABLE_MASK;
478
+ }
479
+
480
+ /* Process sample request */
481
+ if ((req ^ ack) & GLB_REQ_PRFCNT_SAMPLE_MASK) {
482
+ const u32 ring_size = GLB_PRFCNT_CONFIG_SIZE_GET(
483
+ input_page_read(iface->input, GLB_PRFCNT_CONFIG));
484
+ u32 insert_index = output_page_read(iface->output, GLB_PRFCNT_INSERT);
485
+
486
+ const bool prev_overflow = (req ^ ack) & GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK;
487
+ const bool prev_threshold = (req ^ ack) & GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK;
488
+
489
+ /* If ringbuffer is full toggle PRFCNT_OVERFLOW and skip sample */
490
+ if (insert_index - extract_index >= ring_size) {
491
+ WARN_ON(insert_index - extract_index > ring_size);
492
+ if (!prev_overflow)
493
+ ack ^= GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK;
494
+ } else {
495
+ struct gpu_model_prfcnt_en enable_maps = {
496
+ .fe = input_page_read(iface->input, GLB_PRFCNT_CSF_EN),
497
+ .tiler = input_page_read(iface->input, GLB_PRFCNT_TILER_EN),
498
+ .l2 = input_page_read(iface->input, GLB_PRFCNT_MMU_L2_EN),
499
+ .shader = input_page_read(iface->input, GLB_PRFCNT_SHADER_EN),
500
+ };
501
+
502
+ const u64 prfcnt_base =
503
+ input_page_read(iface->input, GLB_PRFCNT_BASE_LO) +
504
+ ((u64)input_page_read(iface->input, GLB_PRFCNT_BASE_HI) << 32);
505
+
506
+ u32 *sample_base = (u32 *)(uintptr_t)prfcnt_base +
507
+ (KBASE_DUMMY_MODEL_MAX_VALUES_PER_SAMPLE *
508
+ (insert_index % ring_size));
509
+
510
+ /* trigger sample dump in the dummy model */
511
+ gpu_model_prfcnt_dump_request(sample_base, enable_maps);
512
+
513
+ /* increment insert index and toggle PRFCNT_SAMPLE bit in ACK */
514
+ output_page_write(iface->output, GLB_PRFCNT_INSERT, ++insert_index);
515
+ ack ^= GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK;
516
+ }
517
+
518
+ /* When the ringbuffer reaches 50% capacity toggle PRFCNT_THRESHOLD */
519
+ if (!prev_threshold && (insert_index - extract_index >= (ring_size / 2)))
520
+ ack ^= GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK;
521
+ }
522
+
523
+ /* Update GLB_ACK */
524
+ output_page_write(iface->output, GLB_ACK, ack);
525
+}
526
+
527
+void kbase_csf_ring_doorbell(struct kbase_device *kbdev, int doorbell_nr)
528
+{
529
+ WARN_ON(doorbell_nr < 0);
530
+ WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL);
531
+
532
+ if (WARN_ON(!kbdev))
533
+ return;
534
+
535
+ if (doorbell_nr == CSF_KERNEL_DOORBELL_NR) {
536
+ csf_doorbell_prfcnt(kbdev);
537
+ gpu_model_glb_request_job_irq(kbdev->model);
538
+ }
539
+}
540
+EXPORT_SYMBOL(kbase_csf_ring_doorbell);
462541
463542 /**
464543 * handle_internal_firmware_fatal - Handler for CS internal firmware fault.
....@@ -560,6 +639,8 @@
560639 dev_warn(kbdev->dev, "Timed out waiting for global request %x to complete",
561640 req_mask);
562641 err = -ETIMEDOUT;
642
+
643
+
563644 }
564645
565646 return err;
....@@ -621,16 +702,94 @@
621702 set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK);
622703 }
623704
705
+static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
706
+{
707
+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
708
+
709
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
710
+
711
+ kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
712
+ kbdev->csf.gpu_idle_dur_count);
713
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE,
714
+ GLB_REQ_IDLE_ENABLE_MASK);
715
+ dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
716
+ kbdev->csf.gpu_idle_dur_count);
717
+}
718
+
719
+static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask)
720
+{
721
+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
722
+ bool complete = false;
723
+ unsigned long flags;
724
+
725
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
726
+
727
+ if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) ==
728
+ (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask))
729
+ complete = true;
730
+
731
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
732
+
733
+ return complete;
734
+}
735
+
736
+static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface,
737
+ u32 const req_mask)
738
+{
739
+ u32 glb_debug_req;
740
+
741
+ kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev);
742
+
743
+ glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
744
+ glb_debug_req ^= req_mask;
745
+
746
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask);
747
+}
748
+
749
+static void request_fw_core_dump(
750
+ const struct kbase_csf_global_iface *const global_iface)
751
+{
752
+ uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP);
753
+
754
+ set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode);
755
+
756
+ set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
757
+}
758
+
759
+int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev)
760
+{
761
+ const struct kbase_csf_global_iface *const global_iface =
762
+ &kbdev->csf.global_iface;
763
+ unsigned long flags;
764
+ int ret;
765
+
766
+ /* Serialize CORE_DUMP requests. */
767
+ mutex_lock(&kbdev->csf.reg_lock);
768
+
769
+ /* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */
770
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
771
+ request_fw_core_dump(global_iface);
772
+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
773
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
774
+
775
+ /* Wait for firmware to acknowledge completion of the CORE_DUMP request. */
776
+ ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
777
+ if (!ret)
778
+ WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK));
779
+
780
+ mutex_unlock(&kbdev->csf.reg_lock);
781
+
782
+ return ret;
783
+}
784
+
624785 static void global_init(struct kbase_device *const kbdev, u64 core_mask)
625786 {
626
- u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK |
627
- GLB_ACK_IRQ_MASK_PING_MASK |
628
- GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK |
629
- GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
630
- GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
631
- GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK |
632
- GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK |
633
- GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK;
787
+ u32 const ack_irq_mask =
788
+ GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | GLB_ACK_IRQ_MASK_PING_MASK |
789
+ GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
790
+ GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
791
+ GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
792
+ GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK | GLB_REQ_DEBUG_CSF_REQ_MASK;
634793
635794 const struct kbase_csf_global_iface *const global_iface =
636795 &kbdev->csf.global_iface;
....@@ -643,6 +802,12 @@
643802 enable_shader_poweroff_timer(kbdev, global_iface);
644803
645804 set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));
805
+
806
+ /* The GPU idle timer is always enabled for simplicity. Checks will be
807
+ * done before scheduling the GPU idle worker to see if it is
808
+ * appropriate for the current power policy.
809
+ */
810
+ enable_gpu_idle_timer(kbdev);
646811
647812 /* Unmask the interrupts */
648813 kbase_csf_firmware_global_input(global_iface,
....@@ -786,8 +951,9 @@
786951 dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!");
787952 spin_unlock(&kbdev->pm.clk_rtm.lock);
788953
789
- dev_info(kbdev->dev, "Can't get the timestamp frequency, "
790
- "use cycle counter format with firmware idle hysteresis!");
954
+ dev_info(
955
+ kbdev->dev,
956
+ "Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!");
791957 }
792958
793959 /* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */
....@@ -811,7 +977,14 @@
811977
812978 u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
813979 {
814
- return kbdev->csf.gpu_idle_hysteresis_ms;
980
+ unsigned long flags;
981
+ u32 dur;
982
+
983
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
984
+ dur = kbdev->csf.gpu_idle_hysteresis_us;
985
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
986
+
987
+ return dur;
815988 }
816989
817990 u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
....@@ -819,11 +992,53 @@
819992 unsigned long flags;
820993 const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);
821994
822
- kbase_csf_scheduler_spin_lock(kbdev, &flags);
823
- kbdev->csf.gpu_idle_hysteresis_ms = dur;
824
- kbdev->csf.gpu_idle_dur_count = hysteresis_val;
825
- kbase_csf_scheduler_spin_unlock(kbdev, flags);
995
+ /* The 'fw_load_lock' is taken to synchronize against the deferred
996
+ * loading of FW, where the idle timer will be enabled.
997
+ */
998
+ mutex_lock(&kbdev->fw_load_lock);
999
+ if (unlikely(!kbdev->csf.firmware_inited)) {
1000
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
1001
+ kbdev->csf.gpu_idle_hysteresis_us = dur;
1002
+ kbdev->csf.gpu_idle_dur_count = hysteresis_val;
1003
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
1004
+ mutex_unlock(&kbdev->fw_load_lock);
1005
+ goto end;
1006
+ }
1007
+ mutex_unlock(&kbdev->fw_load_lock);
8261008
1009
+ kbase_csf_scheduler_pm_active(kbdev);
1010
+ if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
1011
+ dev_err(kbdev->dev,
1012
+ "Unable to activate the MCU, the idle hysteresis value shall remain unchanged");
1013
+ kbase_csf_scheduler_pm_idle(kbdev);
1014
+ return kbdev->csf.gpu_idle_dur_count;
1015
+ }
1016
+
1017
+ /* The 'reg_lock' is also taken and is held till the update is not
1018
+ * complete, to ensure the update of idle timer value by multiple Users
1019
+ * gets serialized.
1020
+ */
1021
+ mutex_lock(&kbdev->csf.reg_lock);
1022
+ /* The firmware only reads the new idle timer value when the timer is
1023
+ * disabled.
1024
+ */
1025
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
1026
+ kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
1027
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
1028
+ /* Ensure that the request has taken effect */
1029
+ wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
1030
+
1031
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
1032
+ kbdev->csf.gpu_idle_hysteresis_us = dur;
1033
+ kbdev->csf.gpu_idle_dur_count = hysteresis_val;
1034
+ kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
1035
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
1036
+ wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
1037
+ mutex_unlock(&kbdev->csf.reg_lock);
1038
+
1039
+ kbase_csf_scheduler_pm_idle(kbdev);
1040
+
1041
+end:
8271042 dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
8281043 hysteresis_val);
8291044
....@@ -832,7 +1047,6 @@
8321047
8331048 static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us)
8341049 {
835
-#define PWROFF_VAL_UNIT_SHIFT (10)
8361050 /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
8371051 u64 freq = arch_timer_get_cntfrq();
8381052 u64 dur_val = dur_us;
....@@ -848,8 +1062,9 @@
8481062 dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!");
8491063 spin_unlock(&kbdev->pm.clk_rtm.lock);
8501064
851
- dev_info(kbdev->dev, "Can't get the timestamp frequency, "
852
- "use cycle counter with MCU Core Poweroff timer!");
1065
+ dev_info(
1066
+ kbdev->dev,
1067
+ "Can't get the timestamp frequency, use cycle counter with MCU shader Core Poweroff timer!");
8531068 }
8541069
8551070 /* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */
....@@ -873,7 +1088,14 @@
8731088
8741089 u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev)
8751090 {
876
- return kbdev->csf.mcu_core_pwroff_dur_us;
1091
+ u32 pwroff;
1092
+ unsigned long flags;
1093
+
1094
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1095
+ pwroff = kbdev->csf.mcu_core_pwroff_dur_us;
1096
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1097
+
1098
+ return pwroff;
8771099 }
8781100
8791101 u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur)
....@@ -886,7 +1108,7 @@
8861108 kbdev->csf.mcu_core_pwroff_dur_count = pwroff;
8871109 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
8881110
889
- dev_dbg(kbdev->dev, "MCU Core Poweroff input update: 0x%.8x", pwroff);
1111
+ dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff);
8901112
8911113 return pwroff;
8921114 }
....@@ -895,11 +1117,14 @@
8951117 {
8961118 init_waitqueue_head(&kbdev->csf.event_wait);
8971119 kbdev->csf.interrupt_received = false;
898
- kbdev->csf.fw_timeout_ms = CSF_FIRMWARE_TIMEOUT_MS;
1120
+
1121
+ kbdev->csf.fw_timeout_ms =
1122
+ kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
8991123
9001124 INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
9011125 INIT_LIST_HEAD(&kbdev->csf.firmware_config);
9021126 INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
1127
+ INIT_LIST_HEAD(&kbdev->csf.user_reg.list);
9031128 INIT_WORK(&kbdev->csf.firmware_reload_work,
9041129 kbase_csf_firmware_reload_worker);
9051130 INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
....@@ -909,7 +1134,26 @@
9091134 return 0;
9101135 }
9111136
912
-int kbase_csf_firmware_init(struct kbase_device *kbdev)
1137
+void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
1138
+{
1139
+ mutex_destroy(&kbdev->csf.reg_lock);
1140
+}
1141
+
1142
+int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
1143
+{
1144
+ kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC;
1145
+#ifdef KBASE_PM_RUNTIME
1146
+ if (kbase_pm_gpu_sleep_allowed(kbdev))
1147
+ kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
1148
+#endif
1149
+ WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us);
1150
+ kbdev->csf.gpu_idle_dur_count =
1151
+ convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us);
1152
+
1153
+ return 0;
1154
+}
1155
+
1156
+int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
9131157 {
9141158 int ret;
9151159
....@@ -927,10 +1171,6 @@
9271171 kbdev->as_free |= MCU_AS_BITMASK;
9281172 return ret;
9291173 }
930
-
931
- kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
932
- kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
933
- kbdev, FIRMWARE_IDLE_HYSTERESIS_TIME_MS);
9341174
9351175 ret = kbase_mcu_shared_interface_region_tracker_init(kbdev);
9361176 if (ret != 0) {
....@@ -979,19 +1219,17 @@
9791219 return 0;
9801220
9811221 error:
982
- kbase_csf_firmware_term(kbdev);
1222
+ kbase_csf_firmware_unload_term(kbdev);
9831223 return ret;
9841224 }
9851225
986
-void kbase_csf_firmware_term(struct kbase_device *kbdev)
1226
+void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
9871227 {
9881228 cancel_work_sync(&kbdev->csf.fw_error_work);
9891229
9901230 kbase_csf_timeout_term(kbdev);
9911231
9921232 /* NO_MALI: Don't stop firmware or unload MMU tables */
993
-
994
- kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu);
9951233
9961234 kbase_csf_scheduler_term(kbdev);
9971235
....@@ -1018,44 +1256,30 @@
10181256
10191257 /* NO_MALI: No trace buffers to terminate */
10201258
1021
-#ifndef MALI_KBASE_BUILD
1022
- mali_kutf_fw_utf_entry_cleanup(kbdev);
1023
-#endif
1024
-
1025
- mutex_destroy(&kbdev->csf.reg_lock);
1026
-
10271259 /* This will also free up the region allocated for the shared interface
10281260 * entry parsed from the firmware image.
10291261 */
10301262 kbase_mcu_shared_interface_region_tracker_term(kbdev);
1263
+
1264
+ kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu);
10311265 }
10321266
10331267 void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
10341268 {
10351269 struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
1036
- u32 glb_req;
1270
+ const u32 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
10371271
10381272 kbase_csf_scheduler_spin_lock_assert_held(kbdev);
1039
-
10401273 /* The scheduler is assumed to only call the enable when its internal
10411274 * state indicates that the idle timer has previously been disabled. So
10421275 * on entry the expected field values are:
10431276 * 1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0
10441277 * 2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0
10451278 */
1046
-
1047
- glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
10481279 if (glb_req & GLB_REQ_IDLE_ENABLE_MASK)
10491280 dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!");
10501281
1051
- kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
1052
- kbdev->csf.gpu_idle_dur_count);
1053
-
1054
- kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
1055
- GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK);
1056
-
1057
- dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
1058
- kbdev->csf.gpu_idle_dur_count);
1282
+ enable_gpu_idle_timer(kbdev);
10591283 kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
10601284 }
10611285
....@@ -1086,8 +1310,9 @@
10861310 kbase_csf_scheduler_spin_unlock(kbdev, flags);
10871311 }
10881312
1089
-int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev)
1313
+int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int wait_timeout_ms)
10901314 {
1315
+ CSTD_UNUSED(wait_timeout_ms);
10911316 kbase_csf_firmware_ping(kbdev);
10921317 return wait_for_global_request(kbdev, GLB_REQ_PING_MASK);
10931318 }
....@@ -1119,15 +1344,23 @@
11191344 void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
11201345 {
11211346 struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
1122
- unsigned long flags;
11231347
1124
- kbase_csf_scheduler_spin_lock(kbdev, &flags);
1348
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
11251349 set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK);
11261350 dev_dbg(kbdev->dev, "Sending request to enter protected mode");
11271351 kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
1128
- kbase_csf_scheduler_spin_unlock(kbdev, flags);
1352
+}
11291353
1130
- wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
1354
+int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
1355
+{
1356
+ int err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
1357
+
1358
+ if (err) {
1359
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
1360
+ kbase_reset_gpu(kbdev);
1361
+ }
1362
+
1363
+ return err;
11311364 }
11321365
11331366 void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
....@@ -1136,11 +1369,46 @@
11361369 unsigned long flags;
11371370
11381371 kbase_csf_scheduler_spin_lock(kbdev, &flags);
1372
+ /* Validate there are no on-slot groups when sending the
1373
+ * halt request to firmware.
1374
+ */
1375
+ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev));
11391376 set_global_request(global_iface, GLB_REQ_HALT_MASK);
11401377 dev_dbg(kbdev->dev, "Sending request to HALT MCU");
11411378 kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
11421379 kbase_csf_scheduler_spin_unlock(kbdev, flags);
11431380 }
1381
+
1382
+void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev)
1383
+{
1384
+ /* Trigger the boot of MCU firmware, Use the AUTO mode as
1385
+ * otherwise on fast reset, to exit protected mode, MCU will
1386
+ * not reboot by itself to enter normal mode.
1387
+ */
1388
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO);
1389
+}
1390
+
1391
+#ifdef KBASE_PM_RUNTIME
1392
+void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev)
1393
+{
1394
+ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
1395
+ unsigned long flags;
1396
+
1397
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
1398
+ set_global_request(global_iface, GLB_REQ_SLEEP_MASK);
1399
+ dev_dbg(kbdev->dev, "Sending sleep request to MCU");
1400
+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
1401
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
1402
+}
1403
+
1404
+bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev)
1405
+{
1406
+ lockdep_assert_held(&kbdev->hwaccess_lock);
1407
+
1408
+ return (global_request_complete(kbdev, GLB_REQ_SLEEP_MASK) &&
1409
+ kbase_csf_firmware_mcu_halted(kbdev));
1410
+}
1411
+#endif
11441412
11451413 int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev)
11461414 {
....@@ -1256,6 +1524,11 @@
12561524 return NULL;
12571525 }
12581526
1527
+void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev)
1528
+{
1529
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE);
1530
+}
1531
+
12591532 void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev)
12601533 {
12611534 /* NO_MALI: Nothing to do here */
....@@ -1286,7 +1559,7 @@
12861559 gpu_map_prot =
12871560 KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
12881561 cpu_map_prot = pgprot_writecombine(cpu_map_prot);
1289
- };
1562
+ }
12901563
12911564 phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
12921565 if (!phys)
....@@ -1296,9 +1569,8 @@
12961569 if (!page_list)
12971570 goto page_list_alloc_error;
12981571
1299
- ret = kbase_mem_pool_alloc_pages(
1300
- &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
1301
- num_pages, phys, false);
1572
+ ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
1573
+ phys, false, NULL);
13021574 if (ret <= 0)
13031575 goto phys_mem_pool_alloc_error;
13041576
....@@ -1309,8 +1581,8 @@
13091581 if (!cpu_addr)
13101582 goto vmap_error;
13111583
1312
- va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
1313
- num_pages, KBASE_REG_ZONE_MCU_SHARED);
1584
+ va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages,
1585
+ KBASE_REG_ZONE_MCU_SHARED);
13141586 if (!va_reg)
13151587 goto va_region_alloc_error;
13161588
....@@ -1324,9 +1596,9 @@
13241596 gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR);
13251597 gpu_map_properties |= gpu_map_prot;
13261598
1327
- ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
1328
- va_reg->start_pfn, &phys[0], num_pages,
1329
- gpu_map_properties, KBASE_MEM_GROUP_CSF_FW);
1599
+ ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn,
1600
+ &phys[0], num_pages, gpu_map_properties,
1601
+ KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false);
13301602 if (ret)
13311603 goto mmu_insert_pages_error;
13321604
....@@ -1340,7 +1612,7 @@
13401612
13411613 mmu_insert_pages_error:
13421614 mutex_lock(&kbdev->csf.reg_lock);
1343
- kbase_remove_va_region(va_reg);
1615
+ kbase_remove_va_region(kbdev, va_reg);
13441616 va_region_add_error:
13451617 kbase_free_alloced_region(va_reg);
13461618 mutex_unlock(&kbdev->csf.reg_lock);
....@@ -1372,7 +1644,7 @@
13721644 {
13731645 if (csf_mapping->va_reg) {
13741646 mutex_lock(&kbdev->csf.reg_lock);
1375
- kbase_remove_va_region(csf_mapping->va_reg);
1647
+ kbase_remove_va_region(kbdev, csf_mapping->va_reg);
13761648 kbase_free_alloced_region(csf_mapping->va_reg);
13771649 mutex_unlock(&kbdev->csf.reg_lock);
13781650 }