hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/rknpu/rknpu_job.c
....@@ -27,7 +27,7 @@
2727 {
2828 int index = 0;
2929
30
- switch (core_mask & ((1 << RKNPU_MAX_CORES) - 1)) {
30
+ switch (core_mask) {
3131 case RKNPU_CORE0_MASK:
3232 case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK:
3333 case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK | RKNPU_CORE2_MASK:
....@@ -73,7 +73,7 @@
7373 int task_num = job->args->task_number;
7474
7575 if (core_index >= RKNPU_MAX_CORES || core_index < 0) {
76
- LOG_ERROR("core_index: %d set error!", core_index);
76
+ LOG_ERROR("invalid rknpu core index: %d", core_index);
7777 return 0;
7878 }
7979
....@@ -131,8 +131,6 @@
131131 #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
132132 struct rknpu_gem_object *task_obj = NULL;
133133 #endif
134
- if (rknpu_dev->config->num_irqs == 1)
135
- args->core_mask = RKNPU_CORE0_MASK;
136134
137135 job = kzalloc(sizeof(*job), GFP_KERNEL);
138136 if (!job)
....@@ -197,19 +195,19 @@
197195 break;
198196
199197 if (ret == 0) {
200
- int64_t commit_time = 0;
198
+ int64_t elapse_time_us = 0;
201199 spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
202
- commit_time = ktime_us_delta(ktime_get(),
203
- job->commit_pc_time);
200
+ elapse_time_us = ktime_us_delta(ktime_get(),
201
+ job->hw_commit_time);
204202 continue_wait =
205
- job->commit_pc_time == 0 ?
203
+ job->hw_commit_time == 0 ?
206204 true :
207
- (commit_time < args->timeout * 1000);
205
+ (elapse_time_us < args->timeout * 1000);
208206 spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
209207 LOG_ERROR(
210
- "job: %p, wait_count: %d, continue_wait: %d, commit time: %lldus, wait time: %lldus, timeout time: %uus\n",
208
+ "job: %p, wait_count: %d, continue wait: %d, commit elapse time: %lldus, wait time: %lldus, timeout: %uus\n",
211209 job, wait_count, continue_wait,
212
- (job->commit_pc_time == 0 ? 0 : commit_time),
210
+ (job->hw_commit_time == 0 ? 0 : elapse_time_us),
213211 ktime_us_delta(ktime_get(), job->timestamp),
214212 args->timeout * 1000);
215213 }
....@@ -217,7 +215,7 @@
217215
218216 last_task = job->last_task;
219217 if (!last_task) {
220
- spin_lock_irqsave(&rknpu_dev->lock, flags);
218
+ spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
221219 for (i = 0; i < job->use_core_num; i++) {
222220 subcore_data = &rknpu_dev->subcore_datas[i];
223221 list_for_each_entry_safe(
....@@ -259,6 +257,7 @@
259257 return -EINVAL;
260258
261259 args->task_counter = args->task_number;
260
+ args->hw_elapse_time = job->hw_elapse_time;
262261
263262 return 0;
264263 }
....@@ -289,6 +288,7 @@
289288 int i = 0;
290289 int submit_index = atomic_read(&job->submit_count[core_index]);
291290 int max_submit_number = rknpu_dev->config->max_submit_number;
291
+ unsigned long flags;
292292
293293 if (!task_obj) {
294294 job->ret = -EINVAL;
....@@ -334,9 +334,13 @@
334334 first_task = &task_base[task_start];
335335 last_task = &task_base[task_end];
336336
337
- spin_lock(&rknpu_dev->lock);
338
- REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR);
339
- spin_unlock(&rknpu_dev->lock);
337
+ if (rknpu_dev->config->pc_dma_ctrl) {
338
+ spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
339
+ REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR);
340
+ spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
341
+ } else {
342
+ REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR);
343
+ }
340344
341345 REG_WRITE((first_task->regcfg_amount + RKNPU_PC_DATA_EXTRA_AMOUNT +
342346 pc_data_amount_scale - 1) /
....@@ -363,16 +367,22 @@
363367 return 0;
364368 }
365369
366
-static inline int rknpu_job_subcore_commit(struct rknpu_job *job, int core_index)
370
+static inline int rknpu_job_subcore_commit(struct rknpu_job *job,
371
+ int core_index)
367372 {
368373 struct rknpu_device *rknpu_dev = job->rknpu_dev;
369374 struct rknpu_submit *args = job->args;
370375 void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
376
+ unsigned long flags;
371377
372378 // switch to slave mode
373
- spin_lock(&rknpu_dev->lock);
374
- REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
375
- spin_unlock(&rknpu_dev->lock);
379
+ if (rknpu_dev->config->pc_dma_ctrl) {
380
+ spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
381
+ REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
382
+ spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
383
+ } else {
384
+ REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
385
+ }
376386
377387 if (!(args->flags & RKNPU_JOB_PC)) {
378388 job->ret = -EINVAL;
....@@ -384,7 +394,7 @@
384394
385395 static void rknpu_job_commit(struct rknpu_job *job)
386396 {
387
- switch (job->args->core_mask & ((1 << RKNPU_MAX_CORES) - 1)) {
397
+ switch (job->args->core_mask) {
388398 case RKNPU_CORE0_MASK:
389399 rknpu_job_subcore_commit(job, 0);
390400 break;
....@@ -432,8 +442,8 @@
432442
433443 list_del_init(&job->head[core_index]);
434444 subcore_data->job = job;
435
- job->hw_recoder_time = ktime_get();
436
- job->commit_pc_time = job->hw_recoder_time;
445
+ job->hw_commit_time = ktime_get();
446
+ job->hw_recoder_time = job->hw_commit_time;
437447 spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
438448
439449 if (atomic_dec_and_test(&job->run_count)) {
....@@ -445,13 +455,14 @@
445455 {
446456 struct rknpu_device *rknpu_dev = job->rknpu_dev;
447457 struct rknpu_subcore_data *subcore_data = NULL;
458
+ ktime_t now;
448459 unsigned long flags;
449460 int max_submit_number = rknpu_dev->config->max_submit_number;
450461
451462 if (atomic_inc_return(&job->submit_count[core_index]) <
452463 (rknpu_get_task_number(job, core_index) + max_submit_number - 1) /
453464 max_submit_number) {
454
- rknpu_job_commit(job);
465
+ rknpu_job_subcore_commit(job, core_index);
455466 return;
456467 }
457468
....@@ -460,8 +471,9 @@
460471 spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
461472 subcore_data->job = NULL;
462473 subcore_data->task_num -= rknpu_get_task_number(job, core_index);
463
- subcore_data->timer.busy_time +=
464
- ktime_us_delta(ktime_get(), job->hw_recoder_time);
474
+ now = ktime_get();
475
+ job->hw_elapse_time = ktime_sub(now, job->hw_commit_time);
476
+ subcore_data->timer.busy_time += ktime_sub(now, job->hw_recoder_time);
465477 spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
466478
467479 if (atomic_dec_and_test(&job->interrupt_count)) {
....@@ -485,44 +497,32 @@
485497 rknpu_job_next(rknpu_dev, core_index);
486498 }
487499
500
+static int rknpu_schedule_core_index(struct rknpu_device *rknpu_dev)
501
+{
502
+ int core_num = rknpu_dev->config->num_irqs;
503
+ int task_num = rknpu_dev->subcore_datas[0].task_num;
504
+ int core_index = 0;
505
+ int i = 0;
506
+
507
+ for (i = 1; i < core_num; i++) {
508
+ if (task_num > rknpu_dev->subcore_datas[i].task_num) {
509
+ core_index = i;
510
+ task_num = rknpu_dev->subcore_datas[i].task_num;
511
+ }
512
+ }
513
+
514
+ return core_index;
515
+}
516
+
488517 static void rknpu_job_schedule(struct rknpu_job *job)
489518 {
490519 struct rknpu_device *rknpu_dev = job->rknpu_dev;
491520 struct rknpu_subcore_data *subcore_data = NULL;
492521 int i = 0, core_index = 0;
493522 unsigned long flags;
494
- int task_num_list[3] = { 0, 1, 2 };
495
- int tmp = 0;
496523
497
- if ((job->args->core_mask & ((1 << RKNPU_MAX_CORES) - 1)) ==
498
- RKNPU_CORE_AUTO_MASK) {
499
- if (rknpu_dev->subcore_datas[0].task_num >
500
- rknpu_dev->subcore_datas[1].task_num) {
501
- tmp = task_num_list[1];
502
- task_num_list[1] = task_num_list[0];
503
- task_num_list[0] = tmp;
504
- }
505
- if (rknpu_dev->subcore_datas[task_num_list[0]].task_num >
506
- rknpu_dev->subcore_datas[2].task_num) {
507
- tmp = task_num_list[2];
508
- task_num_list[2] = task_num_list[1];
509
- task_num_list[1] = task_num_list[0];
510
- task_num_list[0] = tmp;
511
- } else if (rknpu_dev->subcore_datas[task_num_list[1]].task_num >
512
- rknpu_dev->subcore_datas[2].task_num) {
513
- tmp = task_num_list[2];
514
- task_num_list[2] = task_num_list[1];
515
- task_num_list[1] = tmp;
516
- }
517
- if (!rknpu_dev->subcore_datas[task_num_list[0]].job)
518
- core_index = task_num_list[0];
519
- else if (!rknpu_dev->subcore_datas[task_num_list[1]].job)
520
- core_index = task_num_list[1];
521
- else if (!rknpu_dev->subcore_datas[task_num_list[2]].job)
522
- core_index = task_num_list[2];
523
- else
524
- core_index = task_num_list[0];
525
-
524
+ if (job->args->core_mask == RKNPU_CORE_AUTO_MASK) {
525
+ core_index = rknpu_schedule_core_index(rknpu_dev);
526526 job->args->core_mask = rknpu_core_mask(core_index);
527527 job->use_core_num = 1;
528528 atomic_set(&job->run_count, job->use_core_num);
....@@ -739,6 +739,11 @@
739739 return -EINVAL;
740740 }
741741
742
+ if (args->core_mask > rknpu_dev->config->core_mask) {
743
+ LOG_ERROR("invalid rknpu core mask: %#x", args->core_mask);
744
+ return -EINVAL;
745
+ }
746
+
742747 job = rknpu_job_alloc(rknpu_dev, args);
743748 if (!job) {
744749 LOG_ERROR("failed to allocate rknpu job!\n");
....@@ -936,27 +941,30 @@
936941 int rknpu_clear_rw_amount(struct rknpu_device *rknpu_dev)
937942 {
938943 void __iomem *rknpu_core_base = rknpu_dev->base[0];
944
+ unsigned long flags;
939945
940946 if (!rknpu_dev->config->bw_enable) {
941947 LOG_WARN("Clear rw_amount is not supported on this device!\n");
942948 return 0;
943949 }
944950
945
- spin_lock(&rknpu_dev->lock);
946
-
947951 if (rknpu_dev->config->pc_dma_ctrl) {
948
- uint32_t pc_data_addr = REG_READ(RKNPU_OFFSET_PC_DATA_ADDR);
952
+ uint32_t pc_data_addr = 0;
953
+
954
+ spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
955
+ pc_data_addr = REG_READ(RKNPU_OFFSET_PC_DATA_ADDR);
949956
950957 REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
951958 REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
952959 REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
953960 REG_WRITE(pc_data_addr, RKNPU_OFFSET_PC_DATA_ADDR);
961
+ spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
954962 } else {
963
+ spin_lock(&rknpu_dev->lock);
955964 REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
956965 REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
966
+ spin_unlock(&rknpu_dev->lock);
957967 }
958
-
959
- spin_unlock(&rknpu_dev->lock);
960968
961969 return 0;
962970 }