From 1543e317f1da31b75942316931e8f491a8920811 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Thu, 04 Jan 2024 10:08:02 +0000
Subject: [PATCH] disable FB
---
kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 243 ++++++++++++++++++++++++++++--------------------
1 files changed, 142 insertions(+), 101 deletions(-)
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 985bebd..dadeb20 100644
--- a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -30,6 +30,7 @@
#include "gc/gc_9_0_offset.h"
#include "gc/gc_9_0_sh_mask.h"
#include "sdma0/sdma0_4_0_sh_mask.h"
+#include "amdgpu_amdkfd.h"
static inline struct v9_mqd *get_mqd(void *mqd)
{
@@ -45,7 +46,7 @@
struct queue_properties *q)
{
struct v9_mqd *m;
- uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
+ uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
if (q->cu_mask_count == 0)
return;
@@ -58,48 +59,84 @@
m->compute_static_thread_mgmt_se1 = se_mask[1];
m->compute_static_thread_mgmt_se2 = se_mask[2];
m->compute_static_thread_mgmt_se3 = se_mask[3];
+ m->compute_static_thread_mgmt_se4 = se_mask[4];
+ m->compute_static_thread_mgmt_se5 = se_mask[5];
+ m->compute_static_thread_mgmt_se6 = se_mask[6];
+ m->compute_static_thread_mgmt_se7 = se_mask[7];
- pr_debug("update cu mask to %#x %#x %#x %#x\n",
+ pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
m->compute_static_thread_mgmt_se0,
m->compute_static_thread_mgmt_se1,
m->compute_static_thread_mgmt_se2,
- m->compute_static_thread_mgmt_se3);
+ m->compute_static_thread_mgmt_se3,
+ m->compute_static_thread_mgmt_se4,
+ m->compute_static_thread_mgmt_se5,
+ m->compute_static_thread_mgmt_se6,
+ m->compute_static_thread_mgmt_se7);
}
-static int init_mqd(struct mqd_manager *mm, void **mqd,
- struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
- struct queue_properties *q)
+static void set_priority(struct v9_mqd *m, struct queue_properties *q)
+{
+ m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
+ m->cp_hqd_queue_priority = q->priority;
+}
+
+static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
+ struct queue_properties *q)
{
int retval;
- uint64_t addr;
- struct v9_mqd *m;
- struct kfd_dev *kfd = mm->dev;
+ struct kfd_mem_obj *mqd_mem_obj = NULL;
- *mqd_mem_obj = NULL;
- /* From V9, for CWSR, the control stack is located on the next page
- * boundary after the mqd, we will use the gtt allocation function
- * instead of sub-allocation function.
+ /* For V9 only, due to a HW bug, the control stack of a user mode
+ * compute queue needs to be allocated just behind the page boundary
+ * of its regular MQD buffer. So we allocate an enlarged MQD buffer:
+ * the first page of the buffer serves as the regular MQD buffer
+ * purpose and the remaining is for control stack. Although the two
+ * parts are in the same buffer object, they need different memory
+ * types: MQD part needs UC (uncached) as usual, while control stack
+ * needs NC (non coherent), which is different from the UC type which
+ * is used when control stack is allocated in user space.
+ *
+ * Because of all those, we use the gtt allocation function instead
+ * of sub-allocation function for this enlarged MQD buffer. Moreover,
+ * in order to achieve two memory types in a single buffer object, we
+ * pass a special bo flag AMDGPU_GEM_CREATE_CP_MQD_GFX9 to instruct
+ * amdgpu memory functions to do so.
*/
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
- *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
- if (!*mqd_mem_obj)
- return -ENOMEM;
- retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd,
+ mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
+ if (!mqd_mem_obj)
+ return NULL;
+ retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
ALIGN(q->ctl_stack_size, PAGE_SIZE) +
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
- &((*mqd_mem_obj)->gtt_mem),
- &((*mqd_mem_obj)->gpu_addr),
- (void *)&((*mqd_mem_obj)->cpu_ptr), true);
- } else
- retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd),
- mqd_mem_obj);
- if (retval) {
- kfree(*mqd_mem_obj);
- return -ENOMEM;
+ &(mqd_mem_obj->gtt_mem),
+ &(mqd_mem_obj->gpu_addr),
+ (void *)&(mqd_mem_obj->cpu_ptr), true);
+
+ if (retval) {
+ kfree(mqd_mem_obj);
+ return NULL;
+ }
+ } else {
+ retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd),
+ &mqd_mem_obj);
+ if (retval)
+ return NULL;
}
- m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr;
- addr = (*mqd_mem_obj)->gpu_addr;
+ return mqd_mem_obj;
+}
+
+static void init_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ uint64_t addr;
+ struct v9_mqd *m;
+
+ m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
memset(m, 0, sizeof(struct v9_mqd));
@@ -109,6 +146,10 @@
m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF;
m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
@@ -120,10 +161,7 @@
m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
- 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
-
- m->cp_hqd_pipe_priority = 1;
- m->cp_hqd_queue_priority = 15;
+ 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
if (q->format == KFD_QUEUE_FORMAT_AQL) {
m->cp_hqd_aql_control =
@@ -151,9 +189,7 @@
*mqd = m;
if (gart_addr)
*gart_addr = addr;
- retval = mm->update_mqd(mm, m, q);
-
- return retval;
+ mm->update_mqd(mm, m, q);
}
static int load_mqd(struct mqd_manager *mm, void *mqd,
@@ -168,7 +204,15 @@
wptr_shift, 0, mms);
}
-static int update_mqd(struct mqd_manager *mm, void *mqd,
+static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id,
+ queue_id, p->doorbell_off);
+}
+
+static void update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
struct v9_mqd *m;
@@ -227,13 +271,9 @@
m->cp_hqd_ctx_save_control = 0;
update_cu_mask(mm, mqd, q);
+ set_priority(m, q);
- q->is_active = (q->queue_size > 0 &&
- q->queue_address != 0 &&
- q->queue_percent > 0 &&
- !q->is_evicted);
-
- return 0;
+ q->is_active = QUEUE_IS_ACTIVE(*q);
}
@@ -247,13 +287,13 @@
pipe_id, queue_id);
}
-static void uninit_mqd(struct mqd_manager *mm, void *mqd,
+static void free_mqd(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
struct kfd_dev *kfd = mm->dev;
if (mqd_mem_obj->gtt_mem) {
- kfd->kfd2kgd->free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
+ amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
kfree(mqd_mem_obj);
} else {
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
@@ -269,71 +309,58 @@
pipe_id, queue_id);
}
-static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
- struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ void __user *ctl_stack,
+ u32 *ctl_stack_used_size,
+ u32 *save_area_used_size)
+{
+ struct v9_mqd *m;
+
+ /* Control stack is located one page after MQD. */
+ void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
+
+ m = get_mqd(mqd);
+
+ *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
+ m->cp_hqd_cntl_stack_offset;
+ *save_area_used_size = m->cp_hqd_wg_state_offset -
+ m->cp_hqd_cntl_stack_size;
+
+ if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
+ return -EFAULT;
+
+ return 0;
+}
+
+static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
{
struct v9_mqd *m;
- int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
- if (retval != 0)
- return retval;
+ init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
m = get_mqd(*mqd);
m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
-
- return retval;
}
-static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
-{
- struct v9_mqd *m;
- int retval = update_mqd(mm, mqd, q);
-
- if (retval != 0)
- return retval;
-
- /* TODO: what's the point? update_mqd already does this. */
- m = get_mqd(mqd);
- m->cp_hqd_vmid = q->vmid;
- return retval;
-}
-
-static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
- struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
{
- int retval;
struct v9_sdma_mqd *m;
-
- retval = kfd_gtt_sa_allocate(mm->dev,
- sizeof(struct v9_sdma_mqd),
- mqd_mem_obj);
-
- if (retval != 0)
- return -ENOMEM;
-
- m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
+ m = (struct v9_sdma_mqd *) mqd_mem_obj->cpu_ptr;
memset(m, 0, sizeof(struct v9_sdma_mqd));
*mqd = m;
if (gart_addr)
- *gart_addr = (*mqd_mem_obj)->gpu_addr;
+ *gart_addr = mqd_mem_obj->gpu_addr;
- retval = mm->update_mqd(mm, m, q);
-
- return retval;
-}
-
-static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
- struct kfd_mem_obj *mqd_mem_obj)
-{
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+ mm->update_mqd(mm, m, q);
}
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
@@ -347,7 +374,7 @@
#define SDMA_RLC_DUMMY_DEFAULT 0xf
-static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
struct v9_sdma_mqd *m;
@@ -370,12 +397,7 @@
m->sdma_queue_id = q->sdma_queue_id;
m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
- q->is_active = (q->queue_size > 0 &&
- q->queue_address != 0 &&
- q->queue_percent > 0 &&
- !q->is_evicted);
-
- return 0;
+ q->is_active = QUEUE_IS_ACTIVE(*q);
}
/*
@@ -431,35 +453,54 @@
switch (type) {
case KFD_MQD_TYPE_CP:
- case KFD_MQD_TYPE_COMPUTE:
+ mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
- mqd->uninit_mqd = uninit_mqd;
+ mqd->free_mqd = free_mqd;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
+ mqd->get_wave_state = get_wave_state;
+ mqd->mqd_size = sizeof(struct v9_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_HIQ:
+ mqd->allocate_mqd = allocate_hiq_mqd;
mqd->init_mqd = init_mqd_hiq;
- mqd->uninit_mqd = uninit_mqd;
- mqd->load_mqd = load_mqd;
- mqd->update_mqd = update_mqd_hiq;
+ mqd->free_mqd = free_mqd_hiq_sdma;
+ mqd->load_mqd = hiq_load_mqd_kiq;
+ mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
+ mqd->mqd_size = sizeof(struct v9_mqd);
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ break;
+ case KFD_MQD_TYPE_DIQ:
+ mqd->allocate_mqd = allocate_mqd;
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->free_mqd = free_mqd;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->is_occupied = is_occupied;
+ mqd->mqd_size = sizeof(struct v9_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_SDMA:
+ mqd->allocate_mqd = allocate_sdma_mqd;
mqd->init_mqd = init_mqd_sdma;
- mqd->uninit_mqd = uninit_mqd_sdma;
+ mqd->free_mqd = free_mqd_hiq_sdma;
mqd->load_mqd = load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
mqd->destroy_mqd = destroy_mqd_sdma;
mqd->is_occupied = is_occupied_sdma;
+ mqd->mqd_size = sizeof(struct v9_sdma_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif
--
Gitblit v1.6.2