From 6778948f9de86c3cfaf36725a7c87dcff9ba247f Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 11 Dec 2023 08:20:59 +0000 Subject: [PATCH] kernel_5.10 no rt --- kernel/drivers/rknpu/rknpu_job.c | 221 +++++++++++++++++++++++++++++++++++++------------------ 1 files changed, 148 insertions(+), 73 deletions(-) diff --git a/kernel/drivers/rknpu/rknpu_job.c b/kernel/drivers/rknpu/rknpu_job.c index 6a167c4..f0f1dd7 100644 --- a/kernel/drivers/rknpu/rknpu_job.c +++ b/kernel/drivers/rknpu/rknpu_job.c @@ -23,16 +23,25 @@ #define REG_READ(offset) _REG_READ(rknpu_core_base, offset) #define REG_WRITE(value, offset) _REG_WRITE(rknpu_core_base, value, offset) -static int rknpu_core_index(int core_mask) +static int rknpu_wait_core_index(int core_mask) { int index = 0; - if (core_mask & RKNPU_CORE0_MASK) + switch (core_mask & ((1 << RKNPU_MAX_CORES) - 1)) { + case RKNPU_CORE0_MASK: + case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK: + case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK | RKNPU_CORE2_MASK: index = 0; - else if (core_mask & RKNPU_CORE1_MASK) + break; + case RKNPU_CORE1_MASK: index = 1; - else if (core_mask & RKNPU_CORE2_MASK) + break; + case RKNPU_CORE2_MASK: index = 2; + break; + default: + break; + } return index; } @@ -58,14 +67,24 @@ return core_mask; } -static int rknn_get_task_number(struct rknpu_job *job, int core_index) +static int rknpu_get_task_number(struct rknpu_job *job, int core_index) { + struct rknpu_device *rknpu_dev = job->rknpu_dev; int task_num = job->args->task_number; - if (job->use_core_num == 2) - task_num = job->args->subcore_task[core_index].task_number; - else if (job->use_core_num == 3) - task_num = job->args->subcore_task[core_index + 2].task_number; + if (core_index >= RKNPU_MAX_CORES || core_index < 0) { + LOG_ERROR("core_index: %d set error!", core_index); + return 0; + } + + if (rknpu_dev->config->num_irqs > 1) { + if (job->use_core_num == 1 || job->use_core_num == 2) + task_num = + job->args->subcore_task[core_index].task_number; + else if (job->use_core_num == 3) + task_num = job->args->subcore_task[core_index + 2] + .task_number; + } return task_num; } @@ -157,10 +176,12 @@ struct rknpu_submit *args = job->args; struct rknpu_task *last_task = NULL; struct rknpu_subcore_data *subcore_data = NULL; + struct rknpu_job *entry, *q; void __iomem *rknpu_core_base = NULL; - int core_index = rknpu_core_index(job->args->core_mask); + int core_index = rknpu_wait_core_index(job->args->core_mask); unsigned long flags; int wait_count = 0; + bool continue_wait = false; int ret = -EINVAL; int i = 0; @@ -171,31 +192,47 @@ job->flags & RKNPU_JOB_DONE || rknpu_dev->soft_reseting, msecs_to_jiffies(args->timeout)); + if (++wait_count >= 3) break; - } while (ret == 0 && job->in_queue[core_index]); - if (job->in_queue[core_index]) { + if (ret == 0) { + int64_t commit_time = 0; + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); + commit_time = ktime_us_delta(ktime_get(), + job->commit_pc_time); + continue_wait = + job->commit_pc_time == 0 ? + true : + (commit_time < args->timeout * 1000); + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); + LOG_ERROR( + "job: %p, wait_count: %d, continue_wait: %d, commit time: %lldus, wait time: %lldus, timeout time: %uus\n", + job, wait_count, continue_wait, + (job->commit_pc_time == 0 ? 0 : commit_time), + ktime_us_delta(ktime_get(), job->timestamp), + args->timeout * 1000); + } + } while (ret == 0 && continue_wait); + + last_task = job->last_task; + if (!last_task) { spin_lock_irqsave(&rknpu_dev->lock, flags); - subcore_data->task_num -= rknn_get_task_number(job, core_index); - if (job->use_core_num == 1) { - list_del_init(&job->head[core_index]); - job->in_queue[core_index] = false; - } else if (job->use_core_num > 1) { - for (i = 0; i < job->use_core_num; i++) { - if (job->in_queue[i]) { - list_del_init(&job->head[i]); - job->in_queue[i] = false; + for (i = 0; i < job->use_core_num; i++) { + subcore_data = &rknpu_dev->subcore_datas[i]; + list_for_each_entry_safe( + entry, q, &subcore_data->todo_list, head[i]) { + if (entry == job) { + list_del(&job->head[i]); + break; } } } - spin_unlock_irqrestore(&rknpu_dev->lock, flags); + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); + + LOG_ERROR("job commit failed\n"); return ret < 0 ? ret : -EINVAL; } - - last_task = job->last_task; - if (!last_task) - return ret < 0 ? ret : -EINVAL; last_task->int_status = job->int_status[core_index]; @@ -213,7 +250,7 @@ LOG_ERROR( "failed to wait job, task counter: %d, flags: %#x, ret = %d, elapsed time: %lldus\n", args->task_counter, args->flags, ret, - ktime_to_us(ktime_sub(ktime_get(), job->timestamp))); + ktime_us_delta(ktime_get(), job->timestamp)); return ret < 0 ? ret : -ETIMEDOUT; } @@ -226,7 +263,8 @@ return 0; } -static inline int rknpu_job_commit_pc(struct rknpu_job *job, int core_index) +static inline int rknpu_job_subcore_commit_pc(struct rknpu_job *job, + int core_index) { struct rknpu_device *rknpu_dev = job->rknpu_dev; struct rknpu_submit *args = job->args; @@ -243,15 +281,19 @@ struct rknpu_task *last_task = NULL; void __iomem *rknpu_core_base = rknpu_dev->base[core_index]; int task_start = args->task_start; - int task_end = args->task_start + args->task_number - 1; + int task_end; int task_number = args->task_number; int task_pp_en = args->flags & RKNPU_JOB_PINGPONG ? 1 : 0; int pc_data_amount_scale = rknpu_dev->config->pc_data_amount_scale; int pc_task_number_bits = rknpu_dev->config->pc_task_number_bits; int i = 0; + int submit_index = atomic_read(&job->submit_count[core_index]); + int max_submit_number = rknpu_dev->config->max_submit_number; - if (!task_obj) - return -EINVAL; + if (!task_obj) { + job->ret = -EINVAL; + return job->ret; + } if (rknpu_dev->config->num_irqs > 1) { for (i = 0; i < rknpu_dev->config->num_irqs; i++) { @@ -261,38 +303,40 @@ } } - if (job->use_core_num == 1) { + switch (job->use_core_num) { + case 1: + case 2: task_start = args->subcore_task[core_index].task_start; - task_end = args->subcore_task[core_index].task_start + - args->subcore_task[core_index].task_number - - 1; task_number = args->subcore_task[core_index].task_number; - } else if (job->use_core_num == 2) { - task_start = args->subcore_task[core_index].task_start; - task_end = args->subcore_task[core_index].task_start + - args->subcore_task[core_index].task_number - - 1; - task_number = - args->subcore_task[core_index].task_number; - } else if (job->use_core_num == 3) { + break; + case 3: task_start = args->subcore_task[core_index + 2].task_start; - task_end = - args->subcore_task[core_index + 2].task_start + - args->subcore_task[core_index + 2].task_number - - 1; task_number = args->subcore_task[core_index + 2].task_number; + break; + default: + LOG_ERROR("Unknown use core num %d\n", + job->use_core_num); + break; } } + + task_start = task_start + submit_index * max_submit_number; + task_number = task_number - submit_index * max_submit_number; + task_number = task_number > max_submit_number ? max_submit_number : + task_number; + task_end = task_start + task_number - 1; task_base = task_obj->kv_addr; first_task = &task_base[task_start]; last_task = &task_base[task_end]; + spin_lock(&rknpu_dev->lock); REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR); + spin_unlock(&rknpu_dev->lock); REG_WRITE((first_task->regcfg_amount + RKNPU_PC_DATA_EXTRA_AMOUNT + pc_data_amount_scale - 1) / @@ -319,19 +363,50 @@ return 0; } -static int rknpu_job_commit(struct rknpu_job *job, int core_index) +static inline int rknpu_job_subcore_commit(struct rknpu_job *job, int core_index) { struct rknpu_device *rknpu_dev = job->rknpu_dev; struct rknpu_submit *args = job->args; void __iomem *rknpu_core_base = rknpu_dev->base[core_index]; // switch to slave mode + spin_lock(&rknpu_dev->lock); REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); + spin_unlock(&rknpu_dev->lock); - if (!(args->flags & RKNPU_JOB_PC)) - return -EINVAL; + if (!(args->flags & RKNPU_JOB_PC)) { + job->ret = -EINVAL; + return job->ret; + } - return rknpu_job_commit_pc(job, core_index); + return rknpu_job_subcore_commit_pc(job, core_index); +} + +static void rknpu_job_commit(struct rknpu_job *job) +{ + switch (job->args->core_mask & ((1 << RKNPU_MAX_CORES) - 1)) { + case RKNPU_CORE0_MASK: + rknpu_job_subcore_commit(job, 0); + break; + case RKNPU_CORE1_MASK: + rknpu_job_subcore_commit(job, 1); + break; + case RKNPU_CORE2_MASK: + rknpu_job_subcore_commit(job, 2); + break; + case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK: + rknpu_job_subcore_commit(job, 0); + rknpu_job_subcore_commit(job, 1); + break; + case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK | RKNPU_CORE2_MASK: + rknpu_job_subcore_commit(job, 0); + rknpu_job_subcore_commit(job, 1); + rknpu_job_subcore_commit(job, 2); + break; + default: + LOG_ERROR("Unknown core mask: %d\n", job->args->core_mask); + break; + } } static void rknpu_job_next(struct rknpu_device *rknpu_dev, int core_index) @@ -356,18 +431,13 @@ head[core_index]); list_del_init(&job->head[core_index]); - job->in_queue[core_index] = false; subcore_data->job = job; job->hw_recoder_time = ktime_get(); + job->commit_pc_time = job->hw_recoder_time; spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); if (atomic_dec_and_test(&job->run_count)) { - if (job->args->core_mask & RKNPU_CORE0_MASK) - job->ret = rknpu_job_commit(job, 0); - if (job->args->core_mask & RKNPU_CORE1_MASK) - job->ret = rknpu_job_commit(job, 1); - if (job->args->core_mask & RKNPU_CORE2_MASK) - job->ret = rknpu_job_commit(job, 2); + rknpu_job_commit(job); } } @@ -376,15 +446,22 @@ struct rknpu_device *rknpu_dev = job->rknpu_dev; struct rknpu_subcore_data *subcore_data = NULL; unsigned long flags; - ktime_t now = ktime_get(); + int max_submit_number = rknpu_dev->config->max_submit_number; + + if (atomic_inc_return(&job->submit_count[core_index]) < + (rknpu_get_task_number(job, core_index) + max_submit_number - 1) / + max_submit_number) { + rknpu_job_commit(job); + return; + } subcore_data = &rknpu_dev->subcore_datas[core_index]; spin_lock_irqsave(&rknpu_dev->irq_lock, flags); subcore_data->job = NULL; - subcore_data->task_num -= rknn_get_task_number(job, core_index); + subcore_data->task_num -= rknpu_get_task_number(job, core_index); subcore_data->timer.busy_time += - ktime_us_delta(now, job->hw_recoder_time); + ktime_us_delta(ktime_get(), job->hw_recoder_time); spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); if (atomic_dec_and_test(&job->interrupt_count)) { @@ -417,7 +494,8 @@ int task_num_list[3] = { 0, 1, 2 }; int tmp = 0; - if ((job->args->core_mask & 0x07) == RKNPU_CORE_AUTO_MASK) { + if ((job->args->core_mask & ((1 << RKNPU_MAX_CORES) - 1)) == + RKNPU_CORE_AUTO_MASK) { if (rknpu_dev->subcore_datas[0].task_num > rknpu_dev->subcore_datas[1].task_num) { tmp = task_num_list[1]; @@ -456,8 +534,7 @@ if (job->args->core_mask & rknpu_core_mask(i)) { subcore_data = &rknpu_dev->subcore_datas[i]; list_add_tail(&job->head[i], &subcore_data->todo_list); - subcore_data->task_num += rknn_get_task_number(job, i); - job->in_queue[i] = true; + subcore_data->task_num += rknpu_get_task_number(job, i); } } spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); @@ -477,18 +554,18 @@ msleep(100); + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); for (i = 0; i < rknpu_dev->config->num_irqs; i++) { if (job->args->core_mask & rknpu_core_mask(i)) { subcore_data = &rknpu_dev->subcore_datas[i]; - spin_lock_irqsave(&rknpu_dev->irq_lock, flags); if (job == subcore_data->job && !job->irq_entry[i]) { subcore_data->job = NULL; subcore_data->task_num -= - rknn_get_task_number(job, i); + rknpu_get_task_number(job, i); } - spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); } } + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); if (job->ret == -ETIMEDOUT) { LOG_ERROR("job timeout, flags: %#x:\n", job->flags); @@ -505,8 +582,8 @@ rknpu_dev->config ->pc_task_status_offset) & rknpu_dev->config->pc_task_number_mask), - ktime_to_us(ktime_sub(ktime_get(), - job->timestamp))); + ktime_us_delta(ktime_get(), + job->timestamp)); } } rknpu_soft_reset(rknpu_dev); @@ -514,7 +591,7 @@ LOG_ERROR( "job abort, flags: %#x, ret: %d, elapsed time: %lldus\n", job->flags, job->ret, - ktime_to_us(ktime_sub(ktime_get(), job->timestamp))); + ktime_us_delta(ktime_get(), job->timestamp)); } rknpu_job_cleanup(job); @@ -609,7 +686,6 @@ { struct rknpu_job *job = NULL; unsigned long flags; - ktime_t now = ktime_get(); struct rknpu_subcore_data *subcore_data = NULL; int i = 0; @@ -618,7 +694,7 @@ subcore_data = &rknpu_dev->subcore_datas[i]; job = subcore_data->job; if (job && - ktime_to_ms(ktime_sub(now, job->timestamp)) >= + ktime_us_delta(ktime_get(), job->timestamp) >= job->args->timeout) { rknpu_soft_reset(rknpu_dev); @@ -640,7 +716,6 @@ struct rknpu_job, head[i]); list_del_init(&job->head[i]); - job->in_queue[i] = false; } else { job = NULL; } -- Gitblit v1.6.2