hc
2023-12-11 6778948f9de86c3cfaf36725a7c87dcff9ba247f
kernel/drivers/rknpu/rknpu_job.c
....@@ -23,16 +23,25 @@
2323 #define REG_READ(offset) _REG_READ(rknpu_core_base, offset)
2424 #define REG_WRITE(value, offset) _REG_WRITE(rknpu_core_base, value, offset)
2525
26
-static int rknpu_core_index(int core_mask)
26
+static int rknpu_wait_core_index(int core_mask)
2727 {
2828 int index = 0;
2929
30
- if (core_mask & RKNPU_CORE0_MASK)
30
+ switch (core_mask & ((1 << RKNPU_MAX_CORES) - 1)) {
31
+ case RKNPU_CORE0_MASK:
32
+ case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK:
33
+ case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK | RKNPU_CORE2_MASK:
3134 index = 0;
32
- else if (core_mask & RKNPU_CORE1_MASK)
35
+ break;
36
+ case RKNPU_CORE1_MASK:
3337 index = 1;
34
- else if (core_mask & RKNPU_CORE2_MASK)
38
+ break;
39
+ case RKNPU_CORE2_MASK:
3540 index = 2;
41
+ break;
42
+ default:
43
+ break;
44
+ }
3645
3746 return index;
3847 }
....@@ -58,14 +67,24 @@
5867 return core_mask;
5968 }
6069
61
-static int rknn_get_task_number(struct rknpu_job *job, int core_index)
70
+static int rknpu_get_task_number(struct rknpu_job *job, int core_index)
6271 {
72
+ struct rknpu_device *rknpu_dev = job->rknpu_dev;
6373 int task_num = job->args->task_number;
6474
65
- if (job->use_core_num == 2)
66
- task_num = job->args->subcore_task[core_index].task_number;
67
- else if (job->use_core_num == 3)
68
- task_num = job->args->subcore_task[core_index + 2].task_number;
75
+ if (core_index >= RKNPU_MAX_CORES || core_index < 0) {
76
+ LOG_ERROR("core_index: %d set error!", core_index);
77
+ return 0;
78
+ }
79
+
80
+ if (rknpu_dev->config->num_irqs > 1) {
81
+ if (job->use_core_num == 1 || job->use_core_num == 2)
82
+ task_num =
83
+ job->args->subcore_task[core_index].task_number;
84
+ else if (job->use_core_num == 3)
85
+ task_num = job->args->subcore_task[core_index + 2]
86
+ .task_number;
87
+ }
6988
7089 return task_num;
7190 }
....@@ -124,8 +143,8 @@
124143 job->use_core_num = (args->core_mask & RKNPU_CORE0_MASK) +
125144 ((args->core_mask & RKNPU_CORE1_MASK) >> 1) +
126145 ((args->core_mask & RKNPU_CORE2_MASK) >> 2);
127
- job->run_count = job->use_core_num;
128
- job->interrupt_count = job->use_core_num;
146
+ atomic_set(&job->run_count, job->use_core_num);
147
+ atomic_set(&job->interrupt_count, job->use_core_num);
129148 #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
130149 task_obj = (struct rknpu_gem_object *)(uintptr_t)args->task_obj_addr;
131150 if (task_obj)
....@@ -157,11 +176,14 @@
157176 struct rknpu_submit *args = job->args;
158177 struct rknpu_task *last_task = NULL;
159178 struct rknpu_subcore_data *subcore_data = NULL;
179
+ struct rknpu_job *entry, *q;
160180 void __iomem *rknpu_core_base = NULL;
161
- int core_index = rknpu_core_index(job->args->core_mask);
181
+ int core_index = rknpu_wait_core_index(job->args->core_mask);
162182 unsigned long flags;
163183 int wait_count = 0;
184
+ bool continue_wait = false;
164185 int ret = -EINVAL;
186
+ int i = 0;
165187
166188 subcore_data = &rknpu_dev->subcore_datas[core_index];
167189
....@@ -170,22 +192,47 @@
170192 job->flags & RKNPU_JOB_DONE ||
171193 rknpu_dev->soft_reseting,
172194 msecs_to_jiffies(args->timeout));
195
+
173196 if (++wait_count >= 3)
174197 break;
175
- } while (ret == 0 && job->in_queue[core_index]);
176198
177
- if (job->in_queue[core_index]) {
178
- spin_lock_irqsave(&rknpu_dev->lock, flags);
179
- list_del_init(&job->head[core_index]);
180
- subcore_data->task_num -= rknn_get_task_number(job, core_index);
181
- job->in_queue[core_index] = false;
182
- spin_unlock_irqrestore(&rknpu_dev->lock, flags);
183
- return ret < 0 ? ret : -EINVAL;
184
- }
199
+ if (ret == 0) {
200
+ int64_t commit_time = 0;
201
+ spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
202
+ commit_time = ktime_us_delta(ktime_get(),
203
+ job->commit_pc_time);
204
+ continue_wait =
205
+ job->commit_pc_time == 0 ?
206
+ true :
207
+ (commit_time < args->timeout * 1000);
208
+ spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
209
+ LOG_ERROR(
210
+ "job: %p, wait_count: %d, continue_wait: %d, commit time: %lldus, wait time: %lldus, timeout time: %uus\n",
211
+ job, wait_count, continue_wait,
212
+ (job->commit_pc_time == 0 ? 0 : commit_time),
213
+ ktime_us_delta(ktime_get(), job->timestamp),
214
+ args->timeout * 1000);
215
+ }
216
+ } while (ret == 0 && continue_wait);
185217
186218 last_task = job->last_task;
187
- if (!last_task)
219
+ if (!last_task) {
220
+ spin_lock_irqsave(&rknpu_dev->lock, flags);
221
+ for (i = 0; i < job->use_core_num; i++) {
222
+ subcore_data = &rknpu_dev->subcore_datas[i];
223
+ list_for_each_entry_safe(
224
+ entry, q, &subcore_data->todo_list, head[i]) {
225
+ if (entry == job) {
226
+ list_del(&job->head[i]);
227
+ break;
228
+ }
229
+ }
230
+ }
231
+ spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
232
+
233
+ LOG_ERROR("job commit failed\n");
188234 return ret < 0 ? ret : -EINVAL;
235
+ }
189236
190237 last_task->int_status = job->int_status[core_index];
191238
....@@ -193,8 +240,8 @@
193240 args->task_counter = 0;
194241 rknpu_core_base = rknpu_dev->base[core_index];
195242 if (args->flags & RKNPU_JOB_PC) {
196
- uint32_t task_status =
197
- REG_READ(RKNPU_OFFSET_PC_TASK_STATUS);
243
+ uint32_t task_status = REG_READ(
244
+ rknpu_dev->config->pc_task_status_offset);
198245 args->task_counter =
199246 (task_status &
200247 rknpu_dev->config->pc_task_number_mask);
....@@ -203,7 +250,7 @@
203250 LOG_ERROR(
204251 "failed to wait job, task counter: %d, flags: %#x, ret = %d, elapsed time: %lldus\n",
205252 args->task_counter, args->flags, ret,
206
- ktime_to_us(ktime_sub(ktime_get(), job->timestamp)));
253
+ ktime_us_delta(ktime_get(), job->timestamp));
207254
208255 return ret < 0 ? ret : -ETIMEDOUT;
209256 }
....@@ -216,7 +263,8 @@
216263 return 0;
217264 }
218265
219
-static inline int rknpu_job_commit_pc(struct rknpu_job *job, int core_index)
266
+static inline int rknpu_job_subcore_commit_pc(struct rknpu_job *job,
267
+ int core_index)
220268 {
221269 struct rknpu_device *rknpu_dev = job->rknpu_dev;
222270 struct rknpu_submit *args = job->args;
....@@ -233,15 +281,19 @@
233281 struct rknpu_task *last_task = NULL;
234282 void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
235283 int task_start = args->task_start;
236
- int task_end = args->task_start + args->task_number - 1;
284
+ int task_end;
237285 int task_number = args->task_number;
238286 int task_pp_en = args->flags & RKNPU_JOB_PINGPONG ? 1 : 0;
239287 int pc_data_amount_scale = rknpu_dev->config->pc_data_amount_scale;
240288 int pc_task_number_bits = rknpu_dev->config->pc_task_number_bits;
241289 int i = 0;
290
+ int submit_index = atomic_read(&job->submit_count[core_index]);
291
+ int max_submit_number = rknpu_dev->config->max_submit_number;
242292
243
- if (!task_obj)
244
- return -EINVAL;
293
+ if (!task_obj) {
294
+ job->ret = -EINVAL;
295
+ return job->ret;
296
+ }
245297
246298 if (rknpu_dev->config->num_irqs > 1) {
247299 for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
....@@ -251,38 +303,40 @@
251303 }
252304 }
253305
254
- if (job->use_core_num == 1) {
306
+ switch (job->use_core_num) {
307
+ case 1:
308
+ case 2:
255309 task_start = args->subcore_task[core_index].task_start;
256
- task_end = args->subcore_task[core_index].task_start +
257
- args->subcore_task[core_index].task_number -
258
- 1;
259310 task_number =
260311 args->subcore_task[core_index].task_number;
261
- } else if (job->use_core_num == 2) {
262
- task_start = args->subcore_task[core_index].task_start;
263
- task_end = args->subcore_task[core_index].task_start +
264
- args->subcore_task[core_index].task_number -
265
- 1;
266
- task_number =
267
- args->subcore_task[core_index].task_number;
268
- } else if (job->use_core_num == 3) {
312
+ break;
313
+ case 3:
269314 task_start =
270315 args->subcore_task[core_index + 2].task_start;
271
- task_end =
272
- args->subcore_task[core_index + 2].task_start +
273
- args->subcore_task[core_index + 2].task_number -
274
- 1;
275316 task_number =
276317 args->subcore_task[core_index + 2].task_number;
318
+ break;
319
+ default:
320
+ LOG_ERROR("Unknown use core num %d\n",
321
+ job->use_core_num);
322
+ break;
277323 }
278324 }
325
+
326
+ task_start = task_start + submit_index * max_submit_number;
327
+ task_number = task_number - submit_index * max_submit_number;
328
+ task_number = task_number > max_submit_number ? max_submit_number :
329
+ task_number;
330
+ task_end = task_start + task_number - 1;
279331
280332 task_base = task_obj->kv_addr;
281333
282334 first_task = &task_base[task_start];
283335 last_task = &task_base[task_end];
284336
337
+ spin_lock(&rknpu_dev->lock);
285338 REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR);
339
+ spin_unlock(&rknpu_dev->lock);
286340
287341 REG_WRITE((first_task->regcfg_amount + RKNPU_PC_DATA_EXTRA_AMOUNT +
288342 pc_data_amount_scale - 1) /
....@@ -309,19 +363,50 @@
309363 return 0;
310364 }
311365
312
-static int rknpu_job_commit(struct rknpu_job *job, int core_index)
366
+static inline int rknpu_job_subcore_commit(struct rknpu_job *job, int core_index)
313367 {
314368 struct rknpu_device *rknpu_dev = job->rknpu_dev;
315369 struct rknpu_submit *args = job->args;
316370 void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
317371
318372 // switch to slave mode
373
+ spin_lock(&rknpu_dev->lock);
319374 REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
375
+ spin_unlock(&rknpu_dev->lock);
320376
321
- if (!(args->flags & RKNPU_JOB_PC))
322
- return -EINVAL;
377
+ if (!(args->flags & RKNPU_JOB_PC)) {
378
+ job->ret = -EINVAL;
379
+ return job->ret;
380
+ }
323381
324
- return rknpu_job_commit_pc(job, core_index);
382
+ return rknpu_job_subcore_commit_pc(job, core_index);
383
+}
384
+
385
+static void rknpu_job_commit(struct rknpu_job *job)
386
+{
387
+ switch (job->args->core_mask & ((1 << RKNPU_MAX_CORES) - 1)) {
388
+ case RKNPU_CORE0_MASK:
389
+ rknpu_job_subcore_commit(job, 0);
390
+ break;
391
+ case RKNPU_CORE1_MASK:
392
+ rknpu_job_subcore_commit(job, 1);
393
+ break;
394
+ case RKNPU_CORE2_MASK:
395
+ rknpu_job_subcore_commit(job, 2);
396
+ break;
397
+ case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK:
398
+ rknpu_job_subcore_commit(job, 0);
399
+ rknpu_job_subcore_commit(job, 1);
400
+ break;
401
+ case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK | RKNPU_CORE2_MASK:
402
+ rknpu_job_subcore_commit(job, 0);
403
+ rknpu_job_subcore_commit(job, 1);
404
+ rknpu_job_subcore_commit(job, 2);
405
+ break;
406
+ default:
407
+ LOG_ERROR("Unknown core mask: %d\n", job->args->core_mask);
408
+ break;
409
+ }
325410 }
326411
327412 static void rknpu_job_next(struct rknpu_device *rknpu_dev, int core_index)
....@@ -346,19 +431,13 @@
346431 head[core_index]);
347432
348433 list_del_init(&job->head[core_index]);
349
- job->in_queue[core_index] = false;
350434 subcore_data->job = job;
351
- job->run_count--;
352435 job->hw_recoder_time = ktime_get();
436
+ job->commit_pc_time = job->hw_recoder_time;
353437 spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
354438
355
- if (job->run_count == 0) {
356
- if (job->args->core_mask & RKNPU_CORE0_MASK)
357
- job->ret = rknpu_job_commit(job, 0);
358
- if (job->args->core_mask & RKNPU_CORE1_MASK)
359
- job->ret = rknpu_job_commit(job, 1);
360
- if (job->args->core_mask & RKNPU_CORE2_MASK)
361
- job->ret = rknpu_job_commit(job, 2);
439
+ if (atomic_dec_and_test(&job->run_count)) {
440
+ rknpu_job_commit(job);
362441 }
363442 }
364443
....@@ -367,19 +446,25 @@
367446 struct rknpu_device *rknpu_dev = job->rknpu_dev;
368447 struct rknpu_subcore_data *subcore_data = NULL;
369448 unsigned long flags;
370
- ktime_t now = ktime_get();
449
+ int max_submit_number = rknpu_dev->config->max_submit_number;
450
+
451
+ if (atomic_inc_return(&job->submit_count[core_index]) <
452
+ (rknpu_get_task_number(job, core_index) + max_submit_number - 1) /
453
+ max_submit_number) {
454
+ rknpu_job_commit(job);
455
+ return;
456
+ }
371457
372458 subcore_data = &rknpu_dev->subcore_datas[core_index];
373459
374460 spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
375461 subcore_data->job = NULL;
376
- subcore_data->task_num -= rknn_get_task_number(job, core_index);
377
- job->interrupt_count--;
462
+ subcore_data->task_num -= rknpu_get_task_number(job, core_index);
378463 subcore_data->timer.busy_time +=
379
- ktime_us_delta(now, job->hw_recoder_time);
464
+ ktime_us_delta(ktime_get(), job->hw_recoder_time);
380465 spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
381466
382
- if (job->interrupt_count == 0) {
467
+ if (atomic_dec_and_test(&job->interrupt_count)) {
383468 int use_core_num = job->use_core_num;
384469
385470 job->flags |= RKNPU_JOB_DONE;
....@@ -409,7 +494,8 @@
409494 int task_num_list[3] = { 0, 1, 2 };
410495 int tmp = 0;
411496
412
- if ((job->args->core_mask & 0x07) == RKNPU_CORE_AUTO_MASK) {
497
+ if ((job->args->core_mask & ((1 << RKNPU_MAX_CORES) - 1)) ==
498
+ RKNPU_CORE_AUTO_MASK) {
413499 if (rknpu_dev->subcore_datas[0].task_num >
414500 rknpu_dev->subcore_datas[1].task_num) {
415501 tmp = task_num_list[1];
....@@ -439,20 +525,19 @@
439525
440526 job->args->core_mask = rknpu_core_mask(core_index);
441527 job->use_core_num = 1;
442
- job->interrupt_count = 1;
443
- job->run_count = 1;
528
+ atomic_set(&job->run_count, job->use_core_num);
529
+ atomic_set(&job->interrupt_count, job->use_core_num);
444530 }
445531
532
+ spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
446533 for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
447534 if (job->args->core_mask & rknpu_core_mask(i)) {
448535 subcore_data = &rknpu_dev->subcore_datas[i];
449
- spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
450536 list_add_tail(&job->head[i], &subcore_data->todo_list);
451
- subcore_data->task_num += rknn_get_task_number(job, i);
452
- job->in_queue[i] = true;
453
- spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
537
+ subcore_data->task_num += rknpu_get_task_number(job, i);
454538 }
455539 }
540
+ spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
456541
457542 for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
458543 if (job->args->core_mask & rknpu_core_mask(i))
....@@ -464,41 +549,49 @@
464549 {
465550 struct rknpu_device *rknpu_dev = job->rknpu_dev;
466551 struct rknpu_subcore_data *subcore_data = NULL;
467
- int core_index = rknpu_core_index(job->args->core_mask);
468
- void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
469552 unsigned long flags;
470553 int i = 0;
471554
472555 msleep(100);
473556
557
+ spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
474558 for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
475559 if (job->args->core_mask & rknpu_core_mask(i)) {
476560 subcore_data = &rknpu_dev->subcore_datas[i];
477
- spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
478561 if (job == subcore_data->job && !job->irq_entry[i]) {
479562 subcore_data->job = NULL;
480563 subcore_data->task_num -=
481
- rknn_get_task_number(job, i);
564
+ rknpu_get_task_number(job, i);
482565 }
483
- spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
484566 }
485567 }
568
+ spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
486569
487570 if (job->ret == -ETIMEDOUT) {
488
- LOG_ERROR(
489
- "job timeout, flags: %#x, irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x, elapsed time: %lldus\n",
490
- job->flags, REG_READ(RKNPU_OFFSET_INT_STATUS),
491
- REG_READ(RKNPU_OFFSET_INT_RAW_STATUS),
492
- job->int_mask[core_index],
493
- (REG_READ(RKNPU_OFFSET_PC_TASK_STATUS) &
494
- rknpu_dev->config->pc_task_number_mask),
495
- ktime_to_us(ktime_sub(ktime_get(), job->timestamp)));
571
+ LOG_ERROR("job timeout, flags: %#x:\n", job->flags);
572
+ for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
573
+ if (job->args->core_mask & rknpu_core_mask(i)) {
574
+ void __iomem *rknpu_core_base =
575
+ rknpu_dev->base[i];
576
+ LOG_ERROR(
577
+ "\tcore %d irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x, elapsed time: %lldus\n",
578
+ i, REG_READ(RKNPU_OFFSET_INT_STATUS),
579
+ REG_READ(RKNPU_OFFSET_INT_RAW_STATUS),
580
+ job->int_mask[i],
581
+ (REG_READ(
582
+ rknpu_dev->config
583
+ ->pc_task_status_offset) &
584
+ rknpu_dev->config->pc_task_number_mask),
585
+ ktime_us_delta(ktime_get(),
586
+ job->timestamp));
587
+ }
588
+ }
496589 rknpu_soft_reset(rknpu_dev);
497590 } else {
498591 LOG_ERROR(
499592 "job abort, flags: %#x, ret: %d, elapsed time: %lldus\n",
500593 job->flags, job->ret,
501
- ktime_to_us(ktime_sub(ktime_get(), job->timestamp)));
594
+ ktime_us_delta(ktime_get(), job->timestamp));
502595 }
503596
504597 rknpu_job_cleanup(job);
....@@ -560,7 +653,7 @@
560653 "invalid irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x\n",
561654 status, REG_READ(RKNPU_OFFSET_INT_RAW_STATUS),
562655 job->int_mask[core_index],
563
- (REG_READ(RKNPU_OFFSET_PC_TASK_STATUS) &
656
+ (REG_READ(rknpu_dev->config->pc_task_status_offset) &
564657 rknpu_dev->config->pc_task_number_mask));
565658 REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR);
566659 return IRQ_HANDLED;
....@@ -593,7 +686,6 @@
593686 {
594687 struct rknpu_job *job = NULL;
595688 unsigned long flags;
596
- ktime_t now = ktime_get();
597689 struct rknpu_subcore_data *subcore_data = NULL;
598690 int i = 0;
599691
....@@ -602,7 +694,7 @@
602694 subcore_data = &rknpu_dev->subcore_datas[i];
603695 job = subcore_data->job;
604696 if (job &&
605
- ktime_to_ms(ktime_sub(now, job->timestamp)) >=
697
+ ktime_us_delta(ktime_get(), job->timestamp) >=
606698 job->args->timeout) {
607699 rknpu_soft_reset(rknpu_dev);
608700
....@@ -624,7 +716,6 @@
624716 struct rknpu_job,
625717 head[i]);
626718 list_del_init(&job->head[i]);
627
- job->in_queue[i] = false;
628719 } else {
629720 job = NULL;
630721 }
....@@ -779,7 +870,7 @@
779870 return -EINVAL;
780871
781872 *version = REG_READ(RKNPU_OFFSET_VERSION) +
782
- REG_READ(RKNPU_OFFSET_VERSION_NUM);
873
+ (REG_READ(RKNPU_OFFSET_VERSION_NUM) & 0xffff);
783874
784875 return 0;
785876 }
....@@ -853,8 +944,17 @@
853944
854945 spin_lock(&rknpu_dev->lock);
855946
856
- REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
857
- REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
947
+ if (rknpu_dev->config->pc_dma_ctrl) {
948
+ uint32_t pc_data_addr = REG_READ(RKNPU_OFFSET_PC_DATA_ADDR);
949
+
950
+ REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
951
+ REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
952
+ REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
953
+ REG_WRITE(pc_data_addr, RKNPU_OFFSET_PC_DATA_ADDR);
954
+ } else {
955
+ REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
956
+ REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
957
+ }
858958
859959 spin_unlock(&rknpu_dev->lock);
860960