From 2f7c68cb55ecb7331f2381deb497c27155f32faf Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Wed, 03 Jan 2024 09:43:39 +0000 Subject: [PATCH] update kernel to 5.10.198 --- kernel/drivers/rknpu/rknpu_job.c | 311 ++++++++++++++++++++++++++++++++------------------- 1 files changed, 197 insertions(+), 114 deletions(-) diff --git a/kernel/drivers/rknpu/rknpu_job.c b/kernel/drivers/rknpu/rknpu_job.c index 6a167c4..6dc94b5 100644 --- a/kernel/drivers/rknpu/rknpu_job.c +++ b/kernel/drivers/rknpu/rknpu_job.c @@ -23,16 +23,25 @@ #define REG_READ(offset) _REG_READ(rknpu_core_base, offset) #define REG_WRITE(value, offset) _REG_WRITE(rknpu_core_base, value, offset) -static int rknpu_core_index(int core_mask) +static int rknpu_wait_core_index(int core_mask) { int index = 0; - if (core_mask & RKNPU_CORE0_MASK) + switch (core_mask) { + case RKNPU_CORE0_MASK: + case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK: + case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK | RKNPU_CORE2_MASK: index = 0; - else if (core_mask & RKNPU_CORE1_MASK) + break; + case RKNPU_CORE1_MASK: index = 1; - else if (core_mask & RKNPU_CORE2_MASK) + break; + case RKNPU_CORE2_MASK: index = 2; + break; + default: + break; + } return index; } @@ -58,14 +67,24 @@ return core_mask; } -static int rknn_get_task_number(struct rknpu_job *job, int core_index) +static int rknpu_get_task_number(struct rknpu_job *job, int core_index) { + struct rknpu_device *rknpu_dev = job->rknpu_dev; int task_num = job->args->task_number; - if (job->use_core_num == 2) - task_num = job->args->subcore_task[core_index].task_number; - else if (job->use_core_num == 3) - task_num = job->args->subcore_task[core_index + 2].task_number; + if (core_index >= RKNPU_MAX_CORES || core_index < 0) { + LOG_ERROR("invalid rknpu core index: %d", core_index); + return 0; + } + + if (rknpu_dev->config->num_irqs > 1) { + if (job->use_core_num == 1 || job->use_core_num == 2) + task_num = + job->args->subcore_task[core_index].task_number; + else if (job->use_core_num == 3) + task_num = job->args->subcore_task[core_index + 2] + .task_number; + } return task_num; } @@ -112,8 +131,6 @@ #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM struct rknpu_gem_object *task_obj = NULL; #endif - if (rknpu_dev->config->num_irqs == 1) - args->core_mask = RKNPU_CORE0_MASK; job = kzalloc(sizeof(*job), GFP_KERNEL); if (!job) @@ -157,10 +174,12 @@ struct rknpu_submit *args = job->args; struct rknpu_task *last_task = NULL; struct rknpu_subcore_data *subcore_data = NULL; + struct rknpu_job *entry, *q; void __iomem *rknpu_core_base = NULL; - int core_index = rknpu_core_index(job->args->core_mask); + int core_index = rknpu_wait_core_index(job->args->core_mask); unsigned long flags; int wait_count = 0; + bool continue_wait = false; int ret = -EINVAL; int i = 0; @@ -171,31 +190,47 @@ job->flags & RKNPU_JOB_DONE || rknpu_dev->soft_reseting, msecs_to_jiffies(args->timeout)); + if (++wait_count >= 3) break; - } while (ret == 0 && job->in_queue[core_index]); - if (job->in_queue[core_index]) { - spin_lock_irqsave(&rknpu_dev->lock, flags); - subcore_data->task_num -= rknn_get_task_number(job, core_index); - if (job->use_core_num == 1) { - list_del_init(&job->head[core_index]); - job->in_queue[core_index] = false; - } else if (job->use_core_num > 1) { - for (i = 0; i < job->use_core_num; i++) { - if (job->in_queue[i]) { - list_del_init(&job->head[i]); - job->in_queue[i] = false; + if (ret == 0) { + int64_t elapse_time_us = 0; + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); + elapse_time_us = ktime_us_delta(ktime_get(), + job->hw_commit_time); + continue_wait = + job->hw_commit_time == 0 ? + true : + (elapse_time_us < args->timeout * 1000); + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); + LOG_ERROR( + "job: %p, wait_count: %d, continue wait: %d, commit elapse time: %lldus, wait time: %lldus, timeout: %uus\n", + job, wait_count, continue_wait, + (job->hw_commit_time == 0 ? 0 : elapse_time_us), + ktime_us_delta(ktime_get(), job->timestamp), + args->timeout * 1000); + } + } while (ret == 0 && continue_wait); + + last_task = job->last_task; + if (!last_task) { + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); + for (i = 0; i < job->use_core_num; i++) { + subcore_data = &rknpu_dev->subcore_datas[i]; + list_for_each_entry_safe( + entry, q, &subcore_data->todo_list, head[i]) { + if (entry == job) { + list_del(&job->head[i]); + break; } } } - spin_unlock_irqrestore(&rknpu_dev->lock, flags); + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); + + LOG_ERROR("job commit failed\n"); return ret < 0 ? ret : -EINVAL; } - - last_task = job->last_task; - if (!last_task) - return ret < 0 ? ret : -EINVAL; last_task->int_status = job->int_status[core_index]; @@ -213,7 +248,7 @@ LOG_ERROR( "failed to wait job, task counter: %d, flags: %#x, ret = %d, elapsed time: %lldus\n", args->task_counter, args->flags, ret, - ktime_to_us(ktime_sub(ktime_get(), job->timestamp))); + ktime_us_delta(ktime_get(), job->timestamp)); return ret < 0 ? ret : -ETIMEDOUT; } @@ -222,11 +257,13 @@ return -EINVAL; args->task_counter = args->task_number; + args->hw_elapse_time = job->hw_elapse_time; return 0; } -static inline int rknpu_job_commit_pc(struct rknpu_job *job, int core_index) +static inline int rknpu_job_subcore_commit_pc(struct rknpu_job *job, + int core_index) { struct rknpu_device *rknpu_dev = job->rknpu_dev; struct rknpu_submit *args = job->args; @@ -243,15 +280,20 @@ struct rknpu_task *last_task = NULL; void __iomem *rknpu_core_base = rknpu_dev->base[core_index]; int task_start = args->task_start; - int task_end = args->task_start + args->task_number - 1; + int task_end; int task_number = args->task_number; int task_pp_en = args->flags & RKNPU_JOB_PINGPONG ? 1 : 0; int pc_data_amount_scale = rknpu_dev->config->pc_data_amount_scale; int pc_task_number_bits = rknpu_dev->config->pc_task_number_bits; int i = 0; + int submit_index = atomic_read(&job->submit_count[core_index]); + int max_submit_number = rknpu_dev->config->max_submit_number; + unsigned long flags; - if (!task_obj) - return -EINVAL; + if (!task_obj) { + job->ret = -EINVAL; + return job->ret; + } if (rknpu_dev->config->num_irqs > 1) { for (i = 0; i < rknpu_dev->config->num_irqs; i++) { @@ -261,38 +303,44 @@ } } - if (job->use_core_num == 1) { + switch (job->use_core_num) { + case 1: + case 2: task_start = args->subcore_task[core_index].task_start; - task_end = args->subcore_task[core_index].task_start + - args->subcore_task[core_index].task_number - - 1; task_number = args->subcore_task[core_index].task_number; - } else if (job->use_core_num == 2) { - task_start = args->subcore_task[core_index].task_start; - task_end = args->subcore_task[core_index].task_start + - args->subcore_task[core_index].task_number - - 1; - task_number = - args->subcore_task[core_index].task_number; - } else if (job->use_core_num == 3) { + break; + case 3: task_start = args->subcore_task[core_index + 2].task_start; - task_end = - args->subcore_task[core_index + 2].task_start + - args->subcore_task[core_index + 2].task_number - - 1; task_number = args->subcore_task[core_index + 2].task_number; + break; + default: + LOG_ERROR("Unknown use core num %d\n", + job->use_core_num); + break; } } + + task_start = task_start + submit_index * max_submit_number; + task_number = task_number - submit_index * max_submit_number; + task_number = task_number > max_submit_number ? max_submit_number : + task_number; + task_end = task_start + task_number - 1; task_base = task_obj->kv_addr; first_task = &task_base[task_start]; last_task = &task_base[task_end]; - REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR); + if (rknpu_dev->config->pc_dma_ctrl) { + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); + REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR); + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); + } else { + REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR); + } REG_WRITE((first_task->regcfg_amount + RKNPU_PC_DATA_EXTRA_AMOUNT + pc_data_amount_scale - 1) / @@ -319,19 +367,56 @@ return 0; } -static int rknpu_job_commit(struct rknpu_job *job, int core_index) +static inline int rknpu_job_subcore_commit(struct rknpu_job *job, + int core_index) { struct rknpu_device *rknpu_dev = job->rknpu_dev; struct rknpu_submit *args = job->args; void __iomem *rknpu_core_base = rknpu_dev->base[core_index]; + unsigned long flags; // switch to slave mode - REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); + if (rknpu_dev->config->pc_dma_ctrl) { + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); + REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); + } else { + REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); + } - if (!(args->flags & RKNPU_JOB_PC)) - return -EINVAL; + if (!(args->flags & RKNPU_JOB_PC)) { + job->ret = -EINVAL; + return job->ret; + } - return rknpu_job_commit_pc(job, core_index); + return rknpu_job_subcore_commit_pc(job, core_index); +} + +static void rknpu_job_commit(struct rknpu_job *job) +{ + switch (job->args->core_mask) { + case RKNPU_CORE0_MASK: + rknpu_job_subcore_commit(job, 0); + break; + case RKNPU_CORE1_MASK: + rknpu_job_subcore_commit(job, 1); + break; + case RKNPU_CORE2_MASK: + rknpu_job_subcore_commit(job, 2); + break; + case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK: + rknpu_job_subcore_commit(job, 0); + rknpu_job_subcore_commit(job, 1); + break; + case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK | RKNPU_CORE2_MASK: + rknpu_job_subcore_commit(job, 0); + rknpu_job_subcore_commit(job, 1); + rknpu_job_subcore_commit(job, 2); + break; + default: + LOG_ERROR("Unknown core mask: %d\n", job->args->core_mask); + break; + } } static void rknpu_job_next(struct rknpu_device *rknpu_dev, int core_index) @@ -356,18 +441,13 @@ head[core_index]); list_del_init(&job->head[core_index]); - job->in_queue[core_index] = false; subcore_data->job = job; - job->hw_recoder_time = ktime_get(); + job->hw_commit_time = ktime_get(); + job->hw_recoder_time = job->hw_commit_time; spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); if (atomic_dec_and_test(&job->run_count)) { - if (job->args->core_mask & RKNPU_CORE0_MASK) - job->ret = rknpu_job_commit(job, 0); - if (job->args->core_mask & RKNPU_CORE1_MASK) - job->ret = rknpu_job_commit(job, 1); - if (job->args->core_mask & RKNPU_CORE2_MASK) - job->ret = rknpu_job_commit(job, 2); + rknpu_job_commit(job); } } @@ -375,16 +455,25 @@ { struct rknpu_device *rknpu_dev = job->rknpu_dev; struct rknpu_subcore_data *subcore_data = NULL; + ktime_t now; unsigned long flags; - ktime_t now = ktime_get(); + int max_submit_number = rknpu_dev->config->max_submit_number; + + if (atomic_inc_return(&job->submit_count[core_index]) < + (rknpu_get_task_number(job, core_index) + max_submit_number - 1) / + max_submit_number) { + rknpu_job_subcore_commit(job, core_index); + return; + } subcore_data = &rknpu_dev->subcore_datas[core_index]; spin_lock_irqsave(&rknpu_dev->irq_lock, flags); subcore_data->job = NULL; - subcore_data->task_num -= rknn_get_task_number(job, core_index); - subcore_data->timer.busy_time += - ktime_us_delta(now, job->hw_recoder_time); + subcore_data->task_num -= rknpu_get_task_number(job, core_index); + now = ktime_get(); + job->hw_elapse_time = ktime_sub(now, job->hw_commit_time); + subcore_data->timer.busy_time += ktime_sub(now, job->hw_recoder_time); spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); if (atomic_dec_and_test(&job->interrupt_count)) { @@ -408,43 +497,32 @@ rknpu_job_next(rknpu_dev, core_index); } +static int rknpu_schedule_core_index(struct rknpu_device *rknpu_dev) +{ + int core_num = rknpu_dev->config->num_irqs; + int task_num = rknpu_dev->subcore_datas[0].task_num; + int core_index = 0; + int i = 0; + + for (i = 1; i < core_num; i++) { + if (task_num > rknpu_dev->subcore_datas[i].task_num) { + core_index = i; + task_num = rknpu_dev->subcore_datas[i].task_num; + } + } + + return core_index; +} + static void rknpu_job_schedule(struct rknpu_job *job) { struct rknpu_device *rknpu_dev = job->rknpu_dev; struct rknpu_subcore_data *subcore_data = NULL; int i = 0, core_index = 0; unsigned long flags; - int task_num_list[3] = { 0, 1, 2 }; - int tmp = 0; - if ((job->args->core_mask & 0x07) == RKNPU_CORE_AUTO_MASK) { - if (rknpu_dev->subcore_datas[0].task_num > - rknpu_dev->subcore_datas[1].task_num) { - tmp = task_num_list[1]; - task_num_list[1] = task_num_list[0]; - task_num_list[0] = tmp; - } - if (rknpu_dev->subcore_datas[task_num_list[0]].task_num > - rknpu_dev->subcore_datas[2].task_num) { - tmp = task_num_list[2]; - task_num_list[2] = task_num_list[1]; - task_num_list[1] = task_num_list[0]; - task_num_list[0] = tmp; - } else if (rknpu_dev->subcore_datas[task_num_list[1]].task_num > - rknpu_dev->subcore_datas[2].task_num) { - tmp = task_num_list[2]; - task_num_list[2] = task_num_list[1]; - task_num_list[1] = tmp; - } - if (!rknpu_dev->subcore_datas[task_num_list[0]].job) - core_index = task_num_list[0]; - else if (!rknpu_dev->subcore_datas[task_num_list[1]].job) - core_index = task_num_list[1]; - else if (!rknpu_dev->subcore_datas[task_num_list[2]].job) - core_index = task_num_list[2]; - else - core_index = task_num_list[0]; - + if (job->args->core_mask == RKNPU_CORE_AUTO_MASK) { + core_index = rknpu_schedule_core_index(rknpu_dev); job->args->core_mask = rknpu_core_mask(core_index); job->use_core_num = 1; atomic_set(&job->run_count, job->use_core_num); @@ -456,8 +534,7 @@ if (job->args->core_mask & rknpu_core_mask(i)) { subcore_data = &rknpu_dev->subcore_datas[i]; list_add_tail(&job->head[i], &subcore_data->todo_list); - subcore_data->task_num += rknn_get_task_number(job, i); - job->in_queue[i] = true; + subcore_data->task_num += rknpu_get_task_number(job, i); } } spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); @@ -477,18 +554,18 @@ msleep(100); + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); for (i = 0; i < rknpu_dev->config->num_irqs; i++) { if (job->args->core_mask & rknpu_core_mask(i)) { subcore_data = &rknpu_dev->subcore_datas[i]; - spin_lock_irqsave(&rknpu_dev->irq_lock, flags); if (job == subcore_data->job && !job->irq_entry[i]) { subcore_data->job = NULL; subcore_data->task_num -= - rknn_get_task_number(job, i); + rknpu_get_task_number(job, i); } - spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); } } + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); if (job->ret == -ETIMEDOUT) { LOG_ERROR("job timeout, flags: %#x:\n", job->flags); @@ -505,8 +582,8 @@ rknpu_dev->config ->pc_task_status_offset) & rknpu_dev->config->pc_task_number_mask), - ktime_to_us(ktime_sub(ktime_get(), - job->timestamp))); + ktime_us_delta(ktime_get(), + job->timestamp)); } } rknpu_soft_reset(rknpu_dev); @@ -514,7 +591,7 @@ LOG_ERROR( "job abort, flags: %#x, ret: %d, elapsed time: %lldus\n", job->flags, job->ret, - ktime_to_us(ktime_sub(ktime_get(), job->timestamp))); + ktime_us_delta(ktime_get(), job->timestamp)); } rknpu_job_cleanup(job); @@ -609,7 +686,6 @@ { struct rknpu_job *job = NULL; unsigned long flags; - ktime_t now = ktime_get(); struct rknpu_subcore_data *subcore_data = NULL; int i = 0; @@ -618,7 +694,7 @@ subcore_data = &rknpu_dev->subcore_datas[i]; job = subcore_data->job; if (job && - ktime_to_ms(ktime_sub(now, job->timestamp)) >= + ktime_us_delta(ktime_get(), job->timestamp) >= job->args->timeout) { rknpu_soft_reset(rknpu_dev); @@ -640,7 +716,6 @@ struct rknpu_job, head[i]); list_del_init(&job->head[i]); - job->in_queue[i] = false; } else { job = NULL; } @@ -661,6 +736,11 @@ if (args->task_number == 0) { LOG_ERROR("invalid rknpu task number!\n"); + return -EINVAL; + } + + if (args->core_mask > rknpu_dev->config->core_mask) { + LOG_ERROR("invalid rknpu core mask: %#x", args->core_mask); return -EINVAL; } @@ -861,27 +941,30 @@ int rknpu_clear_rw_amount(struct rknpu_device *rknpu_dev) { void __iomem *rknpu_core_base = rknpu_dev->base[0]; + unsigned long flags; if (!rknpu_dev->config->bw_enable) { LOG_WARN("Clear rw_amount is not supported on this device!\n"); return 0; } - spin_lock(&rknpu_dev->lock); - if (rknpu_dev->config->pc_dma_ctrl) { - uint32_t pc_data_addr = REG_READ(RKNPU_OFFSET_PC_DATA_ADDR); + uint32_t pc_data_addr = 0; + + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); + pc_data_addr = REG_READ(RKNPU_OFFSET_PC_DATA_ADDR); REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); REG_WRITE(pc_data_addr, RKNPU_OFFSET_PC_DATA_ADDR); + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); } else { + spin_lock(&rknpu_dev->lock); REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); + spin_unlock(&rknpu_dev->lock); } - - spin_unlock(&rknpu_dev->lock); return 0; } -- Gitblit v1.6.2