From 1543e317f1da31b75942316931e8f491a8920811 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Thu, 04 Jan 2024 10:08:02 +0000
Subject: [PATCH] disable FB
---
kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 384 ++++++++++++++++++++++++++++++++++++------------------
1 files changed, 252 insertions(+), 132 deletions(-)
diff --git a/kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index df69657..2c1c5f7 100644
--- a/kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -22,14 +22,33 @@
* Authors: monk liu <monk.liu@amd.com>
*/
-#include <drm/drmP.h>
#include <drm/drm_auth.h>
#include "amdgpu.h"
#include "amdgpu_sched.h"
+#include "amdgpu_ras.h"
+#include <linux/nospec.h>
+
+#define to_amdgpu_ctx_entity(e) \
+ container_of((e), struct amdgpu_ctx_entity, entity)
+
+const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
+ [AMDGPU_HW_IP_GFX] = 1,
+ [AMDGPU_HW_IP_COMPUTE] = 4,
+ [AMDGPU_HW_IP_DMA] = 2,
+ [AMDGPU_HW_IP_UVD] = 1,
+ [AMDGPU_HW_IP_VCE] = 1,
+ [AMDGPU_HW_IP_UVD_ENC] = 1,
+ [AMDGPU_HW_IP_VCN_DEC] = 1,
+ [AMDGPU_HW_IP_VCN_ENC] = 1,
+ [AMDGPU_HW_IP_VCN_JPEG] = 1,
+};
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
enum drm_sched_priority priority)
{
+ if (priority < 0 || priority >= DRM_SCHED_PRIORITY_COUNT)
+ return -EINVAL;
+
/* NORMAL and below are accessible by everyone */
if (priority <= DRM_SCHED_PRIORITY_NORMAL)
return 0;
@@ -43,36 +62,100 @@
return -EACCES;
}
+static enum gfx_pipe_priority amdgpu_ctx_sched_prio_to_compute_prio(enum drm_sched_priority prio)
+{
+ switch (prio) {
+ case DRM_SCHED_PRIORITY_HIGH:
+ case DRM_SCHED_PRIORITY_KERNEL:
+ return AMDGPU_GFX_PIPE_PRIO_HIGH;
+ default:
+ return AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ }
+}
+
+static unsigned int amdgpu_ctx_prio_sched_to_hw(struct amdgpu_device *adev,
+ enum drm_sched_priority prio,
+ u32 hw_ip)
+{
+ unsigned int hw_prio;
+
+ hw_prio = (hw_ip == AMDGPU_HW_IP_COMPUTE) ?
+ amdgpu_ctx_sched_prio_to_compute_prio(prio) :
+ AMDGPU_RING_PRIO_DEFAULT;
+ hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
+ if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
+ hw_prio = AMDGPU_RING_PRIO_DEFAULT;
+
+ return hw_prio;
+}
+
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
+ const u32 ring)
+{
+ struct amdgpu_device *adev = ctx->adev;
+ struct amdgpu_ctx_entity *entity;
+ struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
+ unsigned num_scheds = 0;
+ unsigned int hw_prio;
+ enum drm_sched_priority priority;
+ int r;
+
+ entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]),
+ GFP_KERNEL);
+ if (!entity)
+ return -ENOMEM;
+
+ entity->sequence = 1;
+ priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
+ ctx->init_priority : ctx->override_priority;
+ hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority, hw_ip);
+
+ hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
+ scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
+ num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
+
+ /* disable load balance if the hw engine retains context among dependent jobs */
+ if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
+ hw_ip == AMDGPU_HW_IP_VCN_DEC ||
+ hw_ip == AMDGPU_HW_IP_UVD_ENC ||
+ hw_ip == AMDGPU_HW_IP_UVD) {
+ sched = drm_sched_pick_best(scheds, num_scheds);
+ scheds = &sched;
+ num_scheds = 1;
+ }
+
+ r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds,
+ &ctx->guilty);
+ if (r)
+ goto error_free_entity;
+
+ ctx->entities[hw_ip][ring] = entity;
+ return 0;
+
+error_free_entity:
+ kfree(entity);
+
+ return r;
+}
+
static int amdgpu_ctx_init(struct amdgpu_device *adev,
enum drm_sched_priority priority,
struct drm_file *filp,
struct amdgpu_ctx *ctx)
{
- unsigned i, j;
int r;
-
- if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
- return -EINVAL;
r = amdgpu_ctx_priority_permit(filp, priority);
if (r)
return r;
memset(ctx, 0, sizeof(*ctx));
+
ctx->adev = adev;
+
kref_init(&ctx->refcount);
spin_lock_init(&ctx->ring_lock);
- ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
- sizeof(struct dma_fence*), GFP_KERNEL);
- if (!ctx->fences)
- return -ENOMEM;
-
mutex_init(&ctx->lock);
-
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- ctx->rings[i].sequence = 1;
- ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
- }
ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
ctx->reset_counter_query = ctx->reset_counter;
@@ -80,34 +163,21 @@
ctx->init_priority = priority;
ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
- /* create context entity for each ring */
- for (i = 0; i < adev->num_rings; i++) {
- struct amdgpu_ring *ring = adev->rings[i];
- struct drm_sched_rq *rq;
-
- rq = &ring->sched.sched_rq[priority];
-
- if (ring == &adev->gfx.kiq.ring)
- continue;
-
- r = drm_sched_entity_init(&ctx->rings[i].entity,
- &rq, 1, &ctx->guilty);
- if (r)
- goto failed;
- }
-
- r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
- if (r)
- goto failed;
-
return 0;
+}
-failed:
- for (j = 0; j < i; j++)
- drm_sched_entity_destroy(&ctx->rings[j].entity);
- kfree(ctx->fences);
- ctx->fences = NULL;
- return r;
+static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
+{
+
+ int i;
+
+ if (!entity)
+ return;
+
+ for (i = 0; i < amdgpu_sched_jobs; ++i)
+ dma_fence_put(entity->fences[i]);
+
+ kfree(entity);
}
static void amdgpu_ctx_fini(struct kref *ref)
@@ -119,17 +189,46 @@
if (!adev)
return;
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
- for (j = 0; j < amdgpu_sched_jobs; ++j)
- dma_fence_put(ctx->rings[i].fences[j]);
- kfree(ctx->fences);
- ctx->fences = NULL;
-
- amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
+ amdgpu_ctx_fini_entity(ctx->entities[i][j]);
+ ctx->entities[i][j] = NULL;
+ }
+ }
mutex_destroy(&ctx->lock);
-
kfree(ctx);
+}
+
+int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
+ u32 ring, struct drm_sched_entity **entity)
+{
+ int r;
+
+ if (hw_ip >= AMDGPU_HW_IP_NUM) {
+ DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
+ return -EINVAL;
+ }
+
+ /* Right now all IPs have only one instance - multiple rings. */
+ if (instance != 0) {
+ DRM_DEBUG("invalid ip instance: %d\n", instance);
+ return -EINVAL;
+ }
+
+ if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
+ DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
+ return -EINVAL;
+ }
+
+ if (ctx->entities[hw_ip][ring] == NULL) {
+ r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
+ if (r)
+ return r;
+ }
+
+ *entity = &ctx->entities[hw_ip][ring]->entity;
+ return 0;
}
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
@@ -147,7 +246,7 @@
return -ENOMEM;
mutex_lock(&mgr->lock);
- r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
+ r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
if (r < 0) {
mutex_unlock(&mgr->lock);
kfree(ctx);
@@ -168,16 +267,16 @@
static void amdgpu_ctx_do_release(struct kref *ref)
{
struct amdgpu_ctx *ctx;
- u32 i;
+ u32 i, j;
ctx = container_of(ref, struct amdgpu_ctx, refcount);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ if (!ctx->entities[i][j])
+ continue;
- for (i = 0; i < ctx->adev->num_rings; i++) {
-
- if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
- continue;
-
- drm_sched_entity_destroy(&ctx->rings[i].entity);
+ drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
+ }
}
amdgpu_ctx_fini(ref);
@@ -274,16 +373,15 @@
enum drm_sched_priority priority;
union drm_amdgpu_ctx *args = data;
- struct amdgpu_device *adev = dev->dev_private;
+ struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_fpriv *fpriv = filp->driver_priv;
- r = 0;
id = args->in.ctx_id;
- priority = amdgpu_to_sched_priority(args->in.priority);
+ r = amdgpu_to_sched_priority(args->in.priority, &priority);
/* For backwards compatibility reasons, we need to accept
* ioctls with garbage in the priority field */
- if (priority == DRM_SCHED_PRIORITY_INVALID)
+ if (r == -EINVAL)
priority = DRM_SCHED_PRIORITY_NORMAL;
switch (args->in.op) {
@@ -334,106 +432,129 @@
return 0;
}
-int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
- struct dma_fence *fence, uint64_t* handler)
+void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity,
+ struct dma_fence *fence, uint64_t* handle)
{
- struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
- uint64_t seq = cring->sequence;
- unsigned idx = 0;
+ struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+ uint64_t seq = centity->sequence;
struct dma_fence *other = NULL;
+ unsigned idx = 0;
idx = seq & (amdgpu_sched_jobs - 1);
- other = cring->fences[idx];
+ other = centity->fences[idx];
if (other)
BUG_ON(!dma_fence_is_signaled(other));
dma_fence_get(fence);
spin_lock(&ctx->ring_lock);
- cring->fences[idx] = fence;
- cring->sequence++;
+ centity->fences[idx] = fence;
+ centity->sequence++;
spin_unlock(&ctx->ring_lock);
dma_fence_put(other);
- if (handler)
- *handler = seq;
-
- return 0;
+ if (handle)
+ *handle = seq;
}
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
- struct amdgpu_ring *ring, uint64_t seq)
+ struct drm_sched_entity *entity,
+ uint64_t seq)
{
- struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
+ struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
struct dma_fence *fence;
spin_lock(&ctx->ring_lock);
if (seq == ~0ull)
- seq = ctx->rings[ring->idx].sequence - 1;
+ seq = centity->sequence - 1;
- if (seq >= cring->sequence) {
+ if (seq >= centity->sequence) {
spin_unlock(&ctx->ring_lock);
return ERR_PTR(-EINVAL);
}
- if (seq + amdgpu_sched_jobs < cring->sequence) {
+ if (seq + amdgpu_sched_jobs < centity->sequence) {
spin_unlock(&ctx->ring_lock);
return NULL;
}
- fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);
+ fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
spin_unlock(&ctx->ring_lock);
return fence;
}
+static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
+ struct amdgpu_ctx_entity *aentity,
+ int hw_ip,
+ enum drm_sched_priority priority)
+{
+ struct amdgpu_device *adev = ctx->adev;
+ unsigned int hw_prio;
+ struct drm_gpu_scheduler **scheds = NULL;
+ unsigned num_scheds;
+
+ /* set sw priority */
+ drm_sched_entity_set_priority(&aentity->entity, priority);
+
+ /* set hw priority */
+ if (hw_ip == AMDGPU_HW_IP_COMPUTE) {
+ hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority,
+ AMDGPU_HW_IP_COMPUTE);
+ hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
+ scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
+ num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
+ drm_sched_entity_modify_sched(&aentity->entity, scheds,
+ num_scheds);
+ }
+}
+
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum drm_sched_priority priority)
{
- int i;
- struct amdgpu_device *adev = ctx->adev;
- struct drm_sched_rq *rq;
- struct drm_sched_entity *entity;
- struct amdgpu_ring *ring;
enum drm_sched_priority ctx_prio;
+ unsigned i, j;
ctx->override_priority = priority;
ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ if (!ctx->entities[i][j])
+ continue;
- for (i = 0; i < adev->num_rings; i++) {
- ring = adev->rings[i];
- entity = &ctx->rings[i].entity;
- rq = &ring->sched.sched_rq[ctx_prio];
-
- if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
- continue;
-
- drm_sched_entity_set_rq(entity, rq);
+ amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j],
+ i, ctx_prio);
+ }
}
}
-int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity)
{
- struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
- unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
- struct dma_fence *other = cring->fences[idx];
+ struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+ struct dma_fence *other;
+ unsigned idx;
+ long r;
- if (other) {
- signed long r;
- r = dma_fence_wait(other, true);
- if (r < 0) {
- if (r != -ERESTARTSYS)
- DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+ spin_lock(&ctx->ring_lock);
+ idx = centity->sequence & (amdgpu_sched_jobs - 1);
+ other = dma_fence_get(centity->fences[idx]);
+ spin_unlock(&ctx->ring_lock);
- return r;
- }
- }
+ if (!other)
+ return 0;
- return 0;
+ r = dma_fence_wait(other, true);
+ if (r < 0 && r != -ERESTARTSYS)
+ DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+
+ dma_fence_put(other);
+ return r;
}
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
@@ -442,57 +563,56 @@
idr_init(&mgr->ctx_handles);
}
-void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
+long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
{
struct amdgpu_ctx *ctx;
struct idr *idp;
- uint32_t id, i;
- long max_wait = MAX_WAIT_SCHED_ENTITY_Q_EMPTY;
+ uint32_t id, i, j;
idp = &mgr->ctx_handles;
mutex_lock(&mgr->lock);
idr_for_each_entry(idp, ctx, id) {
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
- if (!ctx->adev) {
- mutex_unlock(&mgr->lock);
- return;
- }
+ if (!ctx->entities[i][j])
+ continue;
- for (i = 0; i < ctx->adev->num_rings; i++) {
-
- if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
- continue;
-
- max_wait = drm_sched_entity_flush(&ctx->rings[i].entity,
- max_wait);
+ entity = &ctx->entities[i][j]->entity;
+ timeout = drm_sched_entity_flush(entity, timeout);
+ }
}
}
mutex_unlock(&mgr->lock);
+ return timeout;
}
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{
struct amdgpu_ctx *ctx;
struct idr *idp;
- uint32_t id, i;
+ uint32_t id, i, j;
idp = &mgr->ctx_handles;
idr_for_each_entry(idp, ctx, id) {
+ if (kref_read(&ctx->refcount) != 1) {
+ DRM_ERROR("ctx %p is still alive\n", ctx);
+ continue;
+ }
- if (!ctx->adev)
- return;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
- for (i = 0; i < ctx->adev->num_rings; i++) {
+ if (!ctx->entities[i][j])
+ continue;
- if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
- continue;
-
- if (kref_read(&ctx->refcount) == 1)
- drm_sched_entity_fini(&ctx->rings[i].entity);
- else
- DRM_ERROR("ctx %p is still alive\n", ctx);
+ entity = &ctx->entities[i][j]->entity;
+ drm_sched_entity_fini(entity);
+ }
}
}
}
--
Gitblit v1.6.2