hc
2023-12-11 6778948f9de86c3cfaf36725a7c87dcff9ba247f
kernel/drivers/rknpu/rknpu_job.c
....@@ -23,16 +23,25 @@
2323 #define REG_READ(offset) _REG_READ(rknpu_core_base, offset)
2424 #define REG_WRITE(value, offset) _REG_WRITE(rknpu_core_base, value, offset)
2525
26
-static int rknpu_core_index(int core_mask)
26
+static int rknpu_wait_core_index(int core_mask)
2727 {
2828 int index = 0;
2929
30
- if (core_mask & RKNPU_CORE0_MASK)
30
+ switch (core_mask & ((1 << RKNPU_MAX_CORES) - 1)) {
31
+ case RKNPU_CORE0_MASK:
32
+ case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK:
33
+ case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK | RKNPU_CORE2_MASK:
3134 index = 0;
32
- else if (core_mask & RKNPU_CORE1_MASK)
35
+ break;
36
+ case RKNPU_CORE1_MASK:
3337 index = 1;
34
- else if (core_mask & RKNPU_CORE2_MASK)
38
+ break;
39
+ case RKNPU_CORE2_MASK:
3540 index = 2;
41
+ break;
42
+ default:
43
+ break;
44
+ }
3645
3746 return index;
3847 }
....@@ -58,14 +67,24 @@
5867 return core_mask;
5968 }
6069
61
-static int rknn_get_task_number(struct rknpu_job *job, int core_index)
70
+static int rknpu_get_task_number(struct rknpu_job *job, int core_index)
6271 {
72
+ struct rknpu_device *rknpu_dev = job->rknpu_dev;
6373 int task_num = job->args->task_number;
6474
65
- if (job->use_core_num == 2)
66
- task_num = job->args->subcore_task[core_index].task_number;
67
- else if (job->use_core_num == 3)
68
- task_num = job->args->subcore_task[core_index + 2].task_number;
75
+ if (core_index >= RKNPU_MAX_CORES || core_index < 0) {
76
+ LOG_ERROR("core_index: %d set error!", core_index);
77
+ return 0;
78
+ }
79
+
80
+ if (rknpu_dev->config->num_irqs > 1) {
81
+ if (job->use_core_num == 1 || job->use_core_num == 2)
82
+ task_num =
83
+ job->args->subcore_task[core_index].task_number;
84
+ else if (job->use_core_num == 3)
85
+ task_num = job->args->subcore_task[core_index + 2]
86
+ .task_number;
87
+ }
6988
7089 return task_num;
7190 }
....@@ -157,10 +176,12 @@
157176 struct rknpu_submit *args = job->args;
158177 struct rknpu_task *last_task = NULL;
159178 struct rknpu_subcore_data *subcore_data = NULL;
179
+ struct rknpu_job *entry, *q;
160180 void __iomem *rknpu_core_base = NULL;
161
- int core_index = rknpu_core_index(job->args->core_mask);
181
+ int core_index = rknpu_wait_core_index(job->args->core_mask);
162182 unsigned long flags;
163183 int wait_count = 0;
184
+ bool continue_wait = false;
164185 int ret = -EINVAL;
165186 int i = 0;
166187
....@@ -171,31 +192,47 @@
171192 job->flags & RKNPU_JOB_DONE ||
172193 rknpu_dev->soft_reseting,
173194 msecs_to_jiffies(args->timeout));
195
+
174196 if (++wait_count >= 3)
175197 break;
176
- } while (ret == 0 && job->in_queue[core_index]);
177198
178
- if (job->in_queue[core_index]) {
199
+ if (ret == 0) {
200
+ int64_t commit_time = 0;
201
+ spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
202
+ commit_time = ktime_us_delta(ktime_get(),
203
+ job->commit_pc_time);
204
+ continue_wait =
205
+ job->commit_pc_time == 0 ?
206
+ true :
207
+ (commit_time < args->timeout * 1000);
208
+ spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
209
+ LOG_ERROR(
210
+ "job: %p, wait_count: %d, continue_wait: %d, commit time: %lldus, wait time: %lldus, timeout time: %uus\n",
211
+ job, wait_count, continue_wait,
212
+ (job->commit_pc_time == 0 ? 0 : commit_time),
213
+ ktime_us_delta(ktime_get(), job->timestamp),
214
+ args->timeout * 1000);
215
+ }
216
+ } while (ret == 0 && continue_wait);
217
+
218
+ last_task = job->last_task;
219
+ if (!last_task) {
179220 spin_lock_irqsave(&rknpu_dev->lock, flags);
180
- subcore_data->task_num -= rknn_get_task_number(job, core_index);
181
- if (job->use_core_num == 1) {
182
- list_del_init(&job->head[core_index]);
183
- job->in_queue[core_index] = false;
184
- } else if (job->use_core_num > 1) {
185
- for (i = 0; i < job->use_core_num; i++) {
186
- if (job->in_queue[i]) {
187
- list_del_init(&job->head[i]);
188
- job->in_queue[i] = false;
221
+ for (i = 0; i < job->use_core_num; i++) {
222
+ subcore_data = &rknpu_dev->subcore_datas[i];
223
+ list_for_each_entry_safe(
224
+ entry, q, &subcore_data->todo_list, head[i]) {
225
+ if (entry == job) {
226
+ list_del(&job->head[i]);
227
+ break;
189228 }
190229 }
191230 }
192
- spin_unlock_irqrestore(&rknpu_dev->lock, flags);
231
+ spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
232
+
233
+ LOG_ERROR("job commit failed\n");
193234 return ret < 0 ? ret : -EINVAL;
194235 }
195
-
196
- last_task = job->last_task;
197
- if (!last_task)
198
- return ret < 0 ? ret : -EINVAL;
199236
200237 last_task->int_status = job->int_status[core_index];
201238
....@@ -213,7 +250,7 @@
213250 LOG_ERROR(
214251 "failed to wait job, task counter: %d, flags: %#x, ret = %d, elapsed time: %lldus\n",
215252 args->task_counter, args->flags, ret,
216
- ktime_to_us(ktime_sub(ktime_get(), job->timestamp)));
253
+ ktime_us_delta(ktime_get(), job->timestamp));
217254
218255 return ret < 0 ? ret : -ETIMEDOUT;
219256 }
....@@ -226,7 +263,8 @@
226263 return 0;
227264 }
228265
229
-static inline int rknpu_job_commit_pc(struct rknpu_job *job, int core_index)
266
+static inline int rknpu_job_subcore_commit_pc(struct rknpu_job *job,
267
+ int core_index)
230268 {
231269 struct rknpu_device *rknpu_dev = job->rknpu_dev;
232270 struct rknpu_submit *args = job->args;
....@@ -243,15 +281,19 @@
243281 struct rknpu_task *last_task = NULL;
244282 void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
245283 int task_start = args->task_start;
246
- int task_end = args->task_start + args->task_number - 1;
284
+ int task_end;
247285 int task_number = args->task_number;
248286 int task_pp_en = args->flags & RKNPU_JOB_PINGPONG ? 1 : 0;
249287 int pc_data_amount_scale = rknpu_dev->config->pc_data_amount_scale;
250288 int pc_task_number_bits = rknpu_dev->config->pc_task_number_bits;
251289 int i = 0;
290
+ int submit_index = atomic_read(&job->submit_count[core_index]);
291
+ int max_submit_number = rknpu_dev->config->max_submit_number;
252292
253
- if (!task_obj)
254
- return -EINVAL;
293
+ if (!task_obj) {
294
+ job->ret = -EINVAL;
295
+ return job->ret;
296
+ }
255297
256298 if (rknpu_dev->config->num_irqs > 1) {
257299 for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
....@@ -261,38 +303,40 @@
261303 }
262304 }
263305
264
- if (job->use_core_num == 1) {
306
+ switch (job->use_core_num) {
307
+ case 1:
308
+ case 2:
265309 task_start = args->subcore_task[core_index].task_start;
266
- task_end = args->subcore_task[core_index].task_start +
267
- args->subcore_task[core_index].task_number -
268
- 1;
269310 task_number =
270311 args->subcore_task[core_index].task_number;
271
- } else if (job->use_core_num == 2) {
272
- task_start = args->subcore_task[core_index].task_start;
273
- task_end = args->subcore_task[core_index].task_start +
274
- args->subcore_task[core_index].task_number -
275
- 1;
276
- task_number =
277
- args->subcore_task[core_index].task_number;
278
- } else if (job->use_core_num == 3) {
312
+ break;
313
+ case 3:
279314 task_start =
280315 args->subcore_task[core_index + 2].task_start;
281
- task_end =
282
- args->subcore_task[core_index + 2].task_start +
283
- args->subcore_task[core_index + 2].task_number -
284
- 1;
285316 task_number =
286317 args->subcore_task[core_index + 2].task_number;
318
+ break;
319
+ default:
320
+ LOG_ERROR("Unknown use core num %d\n",
321
+ job->use_core_num);
322
+ break;
287323 }
288324 }
325
+
326
+ task_start = task_start + submit_index * max_submit_number;
327
+ task_number = task_number - submit_index * max_submit_number;
328
+ task_number = task_number > max_submit_number ? max_submit_number :
329
+ task_number;
330
+ task_end = task_start + task_number - 1;
289331
290332 task_base = task_obj->kv_addr;
291333
292334 first_task = &task_base[task_start];
293335 last_task = &task_base[task_end];
294336
337
+ spin_lock(&rknpu_dev->lock);
295338 REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR);
339
+ spin_unlock(&rknpu_dev->lock);
296340
297341 REG_WRITE((first_task->regcfg_amount + RKNPU_PC_DATA_EXTRA_AMOUNT +
298342 pc_data_amount_scale - 1) /
....@@ -319,19 +363,50 @@
319363 return 0;
320364 }
321365
322
-static int rknpu_job_commit(struct rknpu_job *job, int core_index)
366
+static inline int rknpu_job_subcore_commit(struct rknpu_job *job, int core_index)
323367 {
324368 struct rknpu_device *rknpu_dev = job->rknpu_dev;
325369 struct rknpu_submit *args = job->args;
326370 void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
327371
328372 // switch to slave mode
373
+ spin_lock(&rknpu_dev->lock);
329374 REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
375
+ spin_unlock(&rknpu_dev->lock);
330376
331
- if (!(args->flags & RKNPU_JOB_PC))
332
- return -EINVAL;
377
+ if (!(args->flags & RKNPU_JOB_PC)) {
378
+ job->ret = -EINVAL;
379
+ return job->ret;
380
+ }
333381
334
- return rknpu_job_commit_pc(job, core_index);
382
+ return rknpu_job_subcore_commit_pc(job, core_index);
383
+}
384
+
385
+static void rknpu_job_commit(struct rknpu_job *job)
386
+{
387
+ switch (job->args->core_mask & ((1 << RKNPU_MAX_CORES) - 1)) {
388
+ case RKNPU_CORE0_MASK:
389
+ rknpu_job_subcore_commit(job, 0);
390
+ break;
391
+ case RKNPU_CORE1_MASK:
392
+ rknpu_job_subcore_commit(job, 1);
393
+ break;
394
+ case RKNPU_CORE2_MASK:
395
+ rknpu_job_subcore_commit(job, 2);
396
+ break;
397
+ case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK:
398
+ rknpu_job_subcore_commit(job, 0);
399
+ rknpu_job_subcore_commit(job, 1);
400
+ break;
401
+ case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK | RKNPU_CORE2_MASK:
402
+ rknpu_job_subcore_commit(job, 0);
403
+ rknpu_job_subcore_commit(job, 1);
404
+ rknpu_job_subcore_commit(job, 2);
405
+ break;
406
+ default:
407
+ LOG_ERROR("Unknown core mask: %d\n", job->args->core_mask);
408
+ break;
409
+ }
335410 }
336411
337412 static void rknpu_job_next(struct rknpu_device *rknpu_dev, int core_index)
....@@ -356,18 +431,13 @@
356431 head[core_index]);
357432
358433 list_del_init(&job->head[core_index]);
359
- job->in_queue[core_index] = false;
360434 subcore_data->job = job;
361435 job->hw_recoder_time = ktime_get();
436
+ job->commit_pc_time = job->hw_recoder_time;
362437 spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
363438
364439 if (atomic_dec_and_test(&job->run_count)) {
365
- if (job->args->core_mask & RKNPU_CORE0_MASK)
366
- job->ret = rknpu_job_commit(job, 0);
367
- if (job->args->core_mask & RKNPU_CORE1_MASK)
368
- job->ret = rknpu_job_commit(job, 1);
369
- if (job->args->core_mask & RKNPU_CORE2_MASK)
370
- job->ret = rknpu_job_commit(job, 2);
440
+ rknpu_job_commit(job);
371441 }
372442 }
373443
....@@ -376,15 +446,22 @@
376446 struct rknpu_device *rknpu_dev = job->rknpu_dev;
377447 struct rknpu_subcore_data *subcore_data = NULL;
378448 unsigned long flags;
379
- ktime_t now = ktime_get();
449
+ int max_submit_number = rknpu_dev->config->max_submit_number;
450
+
451
+ if (atomic_inc_return(&job->submit_count[core_index]) <
452
+ (rknpu_get_task_number(job, core_index) + max_submit_number - 1) /
453
+ max_submit_number) {
454
+ rknpu_job_commit(job);
455
+ return;
456
+ }
380457
381458 subcore_data = &rknpu_dev->subcore_datas[core_index];
382459
383460 spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
384461 subcore_data->job = NULL;
385
- subcore_data->task_num -= rknn_get_task_number(job, core_index);
462
+ subcore_data->task_num -= rknpu_get_task_number(job, core_index);
386463 subcore_data->timer.busy_time +=
387
- ktime_us_delta(now, job->hw_recoder_time);
464
+ ktime_us_delta(ktime_get(), job->hw_recoder_time);
388465 spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
389466
390467 if (atomic_dec_and_test(&job->interrupt_count)) {
....@@ -417,7 +494,8 @@
417494 int task_num_list[3] = { 0, 1, 2 };
418495 int tmp = 0;
419496
420
- if ((job->args->core_mask & 0x07) == RKNPU_CORE_AUTO_MASK) {
497
+ if ((job->args->core_mask & ((1 << RKNPU_MAX_CORES) - 1)) ==
498
+ RKNPU_CORE_AUTO_MASK) {
421499 if (rknpu_dev->subcore_datas[0].task_num >
422500 rknpu_dev->subcore_datas[1].task_num) {
423501 tmp = task_num_list[1];
....@@ -456,8 +534,7 @@
456534 if (job->args->core_mask & rknpu_core_mask(i)) {
457535 subcore_data = &rknpu_dev->subcore_datas[i];
458536 list_add_tail(&job->head[i], &subcore_data->todo_list);
459
- subcore_data->task_num += rknn_get_task_number(job, i);
460
- job->in_queue[i] = true;
537
+ subcore_data->task_num += rknpu_get_task_number(job, i);
461538 }
462539 }
463540 spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
....@@ -477,18 +554,18 @@
477554
478555 msleep(100);
479556
557
+ spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
480558 for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
481559 if (job->args->core_mask & rknpu_core_mask(i)) {
482560 subcore_data = &rknpu_dev->subcore_datas[i];
483
- spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
484561 if (job == subcore_data->job && !job->irq_entry[i]) {
485562 subcore_data->job = NULL;
486563 subcore_data->task_num -=
487
- rknn_get_task_number(job, i);
564
+ rknpu_get_task_number(job, i);
488565 }
489
- spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
490566 }
491567 }
568
+ spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
492569
493570 if (job->ret == -ETIMEDOUT) {
494571 LOG_ERROR("job timeout, flags: %#x:\n", job->flags);
....@@ -505,8 +582,8 @@
505582 rknpu_dev->config
506583 ->pc_task_status_offset) &
507584 rknpu_dev->config->pc_task_number_mask),
508
- ktime_to_us(ktime_sub(ktime_get(),
509
- job->timestamp)));
585
+ ktime_us_delta(ktime_get(),
586
+ job->timestamp));
510587 }
511588 }
512589 rknpu_soft_reset(rknpu_dev);
....@@ -514,7 +591,7 @@
514591 LOG_ERROR(
515592 "job abort, flags: %#x, ret: %d, elapsed time: %lldus\n",
516593 job->flags, job->ret,
517
- ktime_to_us(ktime_sub(ktime_get(), job->timestamp)));
594
+ ktime_us_delta(ktime_get(), job->timestamp));
518595 }
519596
520597 rknpu_job_cleanup(job);
....@@ -609,7 +686,6 @@
609686 {
610687 struct rknpu_job *job = NULL;
611688 unsigned long flags;
612
- ktime_t now = ktime_get();
613689 struct rknpu_subcore_data *subcore_data = NULL;
614690 int i = 0;
615691
....@@ -618,7 +694,7 @@
618694 subcore_data = &rknpu_dev->subcore_datas[i];
619695 job = subcore_data->job;
620696 if (job &&
621
- ktime_to_ms(ktime_sub(now, job->timestamp)) >=
697
+ ktime_us_delta(ktime_get(), job->timestamp) >=
622698 job->args->timeout) {
623699 rknpu_soft_reset(rknpu_dev);
624700
....@@ -640,7 +716,6 @@
640716 struct rknpu_job,
641717 head[i]);
642718 list_del_init(&job->head[i]);
643
- job->in_queue[i] = false;
644719 } else {
645720 job = NULL;
646721 }