hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/rknpu/rknpu_job.c
....@@ -23,16 +23,25 @@
2323 #define REG_READ(offset) _REG_READ(rknpu_core_base, offset)
2424 #define REG_WRITE(value, offset) _REG_WRITE(rknpu_core_base, value, offset)
2525
26
-static int rknpu_core_index(int core_mask)
26
+static int rknpu_wait_core_index(int core_mask)
2727 {
2828 int index = 0;
2929
30
- if (core_mask & RKNPU_CORE0_MASK)
30
+ switch (core_mask) {
31
+ case RKNPU_CORE0_MASK:
32
+ case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK:
33
+ case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK | RKNPU_CORE2_MASK:
3134 index = 0;
32
- else if (core_mask & RKNPU_CORE1_MASK)
35
+ break;
36
+ case RKNPU_CORE1_MASK:
3337 index = 1;
34
- else if (core_mask & RKNPU_CORE2_MASK)
38
+ break;
39
+ case RKNPU_CORE2_MASK:
3540 index = 2;
41
+ break;
42
+ default:
43
+ break;
44
+ }
3645
3746 return index;
3847 }
....@@ -58,14 +67,24 @@
5867 return core_mask;
5968 }
6069
61
-static int rknn_get_task_number(struct rknpu_job *job, int core_index)
70
+static int rknpu_get_task_number(struct rknpu_job *job, int core_index)
6271 {
72
+ struct rknpu_device *rknpu_dev = job->rknpu_dev;
6373 int task_num = job->args->task_number;
6474
65
- if (job->use_core_num == 2)
66
- task_num = job->args->subcore_task[core_index].task_number;
67
- else if (job->use_core_num == 3)
68
- task_num = job->args->subcore_task[core_index + 2].task_number;
75
+ if (core_index >= RKNPU_MAX_CORES || core_index < 0) {
76
+ LOG_ERROR("invalid rknpu core index: %d", core_index);
77
+ return 0;
78
+ }
79
+
80
+ if (rknpu_dev->config->num_irqs > 1) {
81
+ if (job->use_core_num == 1 || job->use_core_num == 2)
82
+ task_num =
83
+ job->args->subcore_task[core_index].task_number;
84
+ else if (job->use_core_num == 3)
85
+ task_num = job->args->subcore_task[core_index + 2]
86
+ .task_number;
87
+ }
6988
7089 return task_num;
7190 }
....@@ -112,8 +131,6 @@
112131 #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
113132 struct rknpu_gem_object *task_obj = NULL;
114133 #endif
115
- if (rknpu_dev->config->num_irqs == 1)
116
- args->core_mask = RKNPU_CORE0_MASK;
117134
118135 job = kzalloc(sizeof(*job), GFP_KERNEL);
119136 if (!job)
....@@ -124,8 +141,8 @@
124141 job->use_core_num = (args->core_mask & RKNPU_CORE0_MASK) +
125142 ((args->core_mask & RKNPU_CORE1_MASK) >> 1) +
126143 ((args->core_mask & RKNPU_CORE2_MASK) >> 2);
127
- job->run_count = job->use_core_num;
128
- job->interrupt_count = job->use_core_num;
144
+ atomic_set(&job->run_count, job->use_core_num);
145
+ atomic_set(&job->interrupt_count, job->use_core_num);
129146 #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
130147 task_obj = (struct rknpu_gem_object *)(uintptr_t)args->task_obj_addr;
131148 if (task_obj)
....@@ -157,35 +174,63 @@
157174 struct rknpu_submit *args = job->args;
158175 struct rknpu_task *last_task = NULL;
159176 struct rknpu_subcore_data *subcore_data = NULL;
177
+ struct rknpu_job *entry, *q;
160178 void __iomem *rknpu_core_base = NULL;
161
- int core_index = rknpu_core_index(job->args->core_mask);
179
+ int core_index = rknpu_wait_core_index(job->args->core_mask);
162180 unsigned long flags;
163181 int wait_count = 0;
182
+ bool continue_wait = false;
164183 int ret = -EINVAL;
184
+ int i = 0;
165185
166186 subcore_data = &rknpu_dev->subcore_datas[core_index];
167187
168188 do {
169
- ret = wait_event_interruptible_timeout(
170
- subcore_data->job_done_wq,
171
- job->flags & RKNPU_JOB_DONE || rknpu_dev->soft_reseting,
172
- msecs_to_jiffies(args->timeout));
189
+ ret = wait_event_timeout(subcore_data->job_done_wq,
190
+ job->flags & RKNPU_JOB_DONE ||
191
+ rknpu_dev->soft_reseting,
192
+ msecs_to_jiffies(args->timeout));
193
+
173194 if (++wait_count >= 3)
174195 break;
175
- } while (ret == 0 && job->in_queue[core_index]);
176196
177
- if (job->in_queue[core_index]) {
178
- spin_lock_irqsave(&rknpu_dev->lock, flags);
179
- list_del_init(&job->head[core_index]);
180
- subcore_data->task_num -= rknn_get_task_number(job, core_index);
181
- job->in_queue[core_index] = false;
182
- spin_unlock_irqrestore(&rknpu_dev->lock, flags);
183
- return ret < 0 ? ret : -EINVAL;
184
- }
197
+ if (ret == 0) {
198
+ int64_t elapse_time_us = 0;
199
+ spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
200
+ elapse_time_us = ktime_us_delta(ktime_get(),
201
+ job->hw_commit_time);
202
+ continue_wait =
203
+ job->hw_commit_time == 0 ?
204
+ true :
205
+ (elapse_time_us < args->timeout * 1000);
206
+ spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
207
+ LOG_ERROR(
208
+ "job: %p, wait_count: %d, continue wait: %d, commit elapse time: %lldus, wait time: %lldus, timeout: %uus\n",
209
+ job, wait_count, continue_wait,
210
+ (job->hw_commit_time == 0 ? 0 : elapse_time_us),
211
+ ktime_us_delta(ktime_get(), job->timestamp),
212
+ args->timeout * 1000);
213
+ }
214
+ } while (ret == 0 && continue_wait);
185215
186216 last_task = job->last_task;
187
- if (!last_task)
217
+ if (!last_task) {
218
+ spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
219
+ for (i = 0; i < job->use_core_num; i++) {
220
+ subcore_data = &rknpu_dev->subcore_datas[i];
221
+ list_for_each_entry_safe(
222
+ entry, q, &subcore_data->todo_list, head[i]) {
223
+ if (entry == job) {
224
+ list_del(&job->head[i]);
225
+ break;
226
+ }
227
+ }
228
+ }
229
+ spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
230
+
231
+ LOG_ERROR("job commit failed\n");
188232 return ret < 0 ? ret : -EINVAL;
233
+ }
189234
190235 last_task->int_status = job->int_status[core_index];
191236
....@@ -193,8 +238,8 @@
193238 args->task_counter = 0;
194239 rknpu_core_base = rknpu_dev->base[core_index];
195240 if (args->flags & RKNPU_JOB_PC) {
196
- uint32_t task_status =
197
- REG_READ(RKNPU_OFFSET_PC_TASK_STATUS);
241
+ uint32_t task_status = REG_READ(
242
+ rknpu_dev->config->pc_task_status_offset);
198243 args->task_counter =
199244 (task_status &
200245 rknpu_dev->config->pc_task_number_mask);
....@@ -203,7 +248,7 @@
203248 LOG_ERROR(
204249 "failed to wait job, task counter: %d, flags: %#x, ret = %d, elapsed time: %lldus\n",
205250 args->task_counter, args->flags, ret,
206
- ktime_to_us(ktime_sub(ktime_get(), job->timestamp)));
251
+ ktime_us_delta(ktime_get(), job->timestamp));
207252
208253 return ret < 0 ? ret : -ETIMEDOUT;
209254 }
....@@ -212,11 +257,13 @@
212257 return -EINVAL;
213258
214259 args->task_counter = args->task_number;
260
+ args->hw_elapse_time = job->hw_elapse_time;
215261
216262 return 0;
217263 }
218264
219
-static inline int rknpu_job_commit_pc(struct rknpu_job *job, int core_index)
265
+static inline int rknpu_job_subcore_commit_pc(struct rknpu_job *job,
266
+ int core_index)
220267 {
221268 struct rknpu_device *rknpu_dev = job->rknpu_dev;
222269 struct rknpu_submit *args = job->args;
....@@ -233,15 +280,20 @@
233280 struct rknpu_task *last_task = NULL;
234281 void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
235282 int task_start = args->task_start;
236
- int task_end = args->task_start + args->task_number - 1;
283
+ int task_end;
237284 int task_number = args->task_number;
238285 int task_pp_en = args->flags & RKNPU_JOB_PINGPONG ? 1 : 0;
239286 int pc_data_amount_scale = rknpu_dev->config->pc_data_amount_scale;
240287 int pc_task_number_bits = rknpu_dev->config->pc_task_number_bits;
241288 int i = 0;
289
+ int submit_index = atomic_read(&job->submit_count[core_index]);
290
+ int max_submit_number = rknpu_dev->config->max_submit_number;
291
+ unsigned long flags;
242292
243
- if (!task_obj)
244
- return -EINVAL;
293
+ if (!task_obj) {
294
+ job->ret = -EINVAL;
295
+ return job->ret;
296
+ }
245297
246298 if (rknpu_dev->config->num_irqs > 1) {
247299 for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
....@@ -251,38 +303,44 @@
251303 }
252304 }
253305
254
- if (job->use_core_num == 1) {
306
+ switch (job->use_core_num) {
307
+ case 1:
308
+ case 2:
255309 task_start = args->subcore_task[core_index].task_start;
256
- task_end = args->subcore_task[core_index].task_start +
257
- args->subcore_task[core_index].task_number -
258
- 1;
259310 task_number =
260311 args->subcore_task[core_index].task_number;
261
- } else if (job->use_core_num == 2) {
262
- task_start = args->subcore_task[core_index].task_start;
263
- task_end = args->subcore_task[core_index].task_start +
264
- args->subcore_task[core_index].task_number -
265
- 1;
266
- task_number =
267
- args->subcore_task[core_index].task_number;
268
- } else if (job->use_core_num == 3) {
312
+ break;
313
+ case 3:
269314 task_start =
270315 args->subcore_task[core_index + 2].task_start;
271
- task_end =
272
- args->subcore_task[core_index + 2].task_start +
273
- args->subcore_task[core_index + 2].task_number -
274
- 1;
275316 task_number =
276317 args->subcore_task[core_index + 2].task_number;
318
+ break;
319
+ default:
320
+ LOG_ERROR("Unknown use core num %d\n",
321
+ job->use_core_num);
322
+ break;
277323 }
278324 }
325
+
326
+ task_start = task_start + submit_index * max_submit_number;
327
+ task_number = task_number - submit_index * max_submit_number;
328
+ task_number = task_number > max_submit_number ? max_submit_number :
329
+ task_number;
330
+ task_end = task_start + task_number - 1;
279331
280332 task_base = task_obj->kv_addr;
281333
282334 first_task = &task_base[task_start];
283335 last_task = &task_base[task_end];
284336
285
- REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR);
337
+ if (rknpu_dev->config->pc_dma_ctrl) {
338
+ spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
339
+ REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR);
340
+ spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
341
+ } else {
342
+ REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR);
343
+ }
286344
287345 REG_WRITE((first_task->regcfg_amount + RKNPU_PC_DATA_EXTRA_AMOUNT +
288346 pc_data_amount_scale - 1) /
....@@ -309,19 +367,56 @@
309367 return 0;
310368 }
311369
312
-static int rknpu_job_commit(struct rknpu_job *job, int core_index)
370
+static inline int rknpu_job_subcore_commit(struct rknpu_job *job,
371
+ int core_index)
313372 {
314373 struct rknpu_device *rknpu_dev = job->rknpu_dev;
315374 struct rknpu_submit *args = job->args;
316375 void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
376
+ unsigned long flags;
317377
318378 // switch to slave mode
319
- REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
379
+ if (rknpu_dev->config->pc_dma_ctrl) {
380
+ spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
381
+ REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
382
+ spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
383
+ } else {
384
+ REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
385
+ }
320386
321
- if (!(args->flags & RKNPU_JOB_PC))
322
- return -EINVAL;
387
+ if (!(args->flags & RKNPU_JOB_PC)) {
388
+ job->ret = -EINVAL;
389
+ return job->ret;
390
+ }
323391
324
- return rknpu_job_commit_pc(job, core_index);
392
+ return rknpu_job_subcore_commit_pc(job, core_index);
393
+}
394
+
395
+static void rknpu_job_commit(struct rknpu_job *job)
396
+{
397
+ switch (job->args->core_mask) {
398
+ case RKNPU_CORE0_MASK:
399
+ rknpu_job_subcore_commit(job, 0);
400
+ break;
401
+ case RKNPU_CORE1_MASK:
402
+ rknpu_job_subcore_commit(job, 1);
403
+ break;
404
+ case RKNPU_CORE2_MASK:
405
+ rknpu_job_subcore_commit(job, 2);
406
+ break;
407
+ case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK:
408
+ rknpu_job_subcore_commit(job, 0);
409
+ rknpu_job_subcore_commit(job, 1);
410
+ break;
411
+ case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK | RKNPU_CORE2_MASK:
412
+ rknpu_job_subcore_commit(job, 0);
413
+ rknpu_job_subcore_commit(job, 1);
414
+ rknpu_job_subcore_commit(job, 2);
415
+ break;
416
+ default:
417
+ LOG_ERROR("Unknown core mask: %d\n", job->args->core_mask);
418
+ break;
419
+ }
325420 }
326421
327422 static void rknpu_job_next(struct rknpu_device *rknpu_dev, int core_index)
....@@ -346,19 +441,13 @@
346441 head[core_index]);
347442
348443 list_del_init(&job->head[core_index]);
349
- job->in_queue[core_index] = false;
350444 subcore_data->job = job;
351
- job->run_count--;
352
- job->hw_recoder_time = ktime_get();
445
+ job->hw_commit_time = ktime_get();
446
+ job->hw_recoder_time = job->hw_commit_time;
353447 spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
354448
355
- if (job->run_count == 0) {
356
- if (job->args->core_mask & RKNPU_CORE0_MASK)
357
- job->ret = rknpu_job_commit(job, 0);
358
- if (job->args->core_mask & RKNPU_CORE1_MASK)
359
- job->ret = rknpu_job_commit(job, 1);
360
- if (job->args->core_mask & RKNPU_CORE2_MASK)
361
- job->ret = rknpu_job_commit(job, 2);
449
+ if (atomic_dec_and_test(&job->run_count)) {
450
+ rknpu_job_commit(job);
362451 }
363452 }
364453
....@@ -366,20 +455,28 @@
366455 {
367456 struct rknpu_device *rknpu_dev = job->rknpu_dev;
368457 struct rknpu_subcore_data *subcore_data = NULL;
458
+ ktime_t now;
369459 unsigned long flags;
370
- ktime_t now = ktime_get();
460
+ int max_submit_number = rknpu_dev->config->max_submit_number;
461
+
462
+ if (atomic_inc_return(&job->submit_count[core_index]) <
463
+ (rknpu_get_task_number(job, core_index) + max_submit_number - 1) /
464
+ max_submit_number) {
465
+ rknpu_job_subcore_commit(job, core_index);
466
+ return;
467
+ }
371468
372469 subcore_data = &rknpu_dev->subcore_datas[core_index];
373470
374471 spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
375472 subcore_data->job = NULL;
376
- subcore_data->task_num -= rknn_get_task_number(job, core_index);
377
- job->interrupt_count--;
378
- subcore_data->timer.busy_time +=
379
- ktime_us_delta(now, job->hw_recoder_time);
473
+ subcore_data->task_num -= rknpu_get_task_number(job, core_index);
474
+ now = ktime_get();
475
+ job->hw_elapse_time = ktime_sub(now, job->hw_commit_time);
476
+ subcore_data->timer.busy_time += ktime_sub(now, job->hw_recoder_time);
380477 spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
381478
382
- if (job->interrupt_count == 0) {
479
+ if (atomic_dec_and_test(&job->interrupt_count)) {
383480 int use_core_num = job->use_core_num;
384481
385482 job->flags |= RKNPU_JOB_DONE;
....@@ -400,59 +497,47 @@
400497 rknpu_job_next(rknpu_dev, core_index);
401498 }
402499
500
+static int rknpu_schedule_core_index(struct rknpu_device *rknpu_dev)
501
+{
502
+ int core_num = rknpu_dev->config->num_irqs;
503
+ int task_num = rknpu_dev->subcore_datas[0].task_num;
504
+ int core_index = 0;
505
+ int i = 0;
506
+
507
+ for (i = 1; i < core_num; i++) {
508
+ if (task_num > rknpu_dev->subcore_datas[i].task_num) {
509
+ core_index = i;
510
+ task_num = rknpu_dev->subcore_datas[i].task_num;
511
+ }
512
+ }
513
+
514
+ return core_index;
515
+}
516
+
403517 static void rknpu_job_schedule(struct rknpu_job *job)
404518 {
405519 struct rknpu_device *rknpu_dev = job->rknpu_dev;
406520 struct rknpu_subcore_data *subcore_data = NULL;
407521 int i = 0, core_index = 0;
408522 unsigned long flags;
409
- int task_num_list[3] = { 0, 1, 2 };
410
- int tmp = 0;
411523
412
- if ((job->args->core_mask & 0x07) == RKNPU_CORE_AUTO_MASK) {
413
- if (rknpu_dev->subcore_datas[0].task_num >
414
- rknpu_dev->subcore_datas[1].task_num) {
415
- tmp = task_num_list[1];
416
- task_num_list[1] = task_num_list[0];
417
- task_num_list[0] = tmp;
418
- }
419
- if (rknpu_dev->subcore_datas[task_num_list[0]].task_num >
420
- rknpu_dev->subcore_datas[2].task_num) {
421
- tmp = task_num_list[2];
422
- task_num_list[2] = task_num_list[1];
423
- task_num_list[1] = task_num_list[0];
424
- task_num_list[0] = tmp;
425
- } else if (rknpu_dev->subcore_datas[task_num_list[1]].task_num >
426
- rknpu_dev->subcore_datas[2].task_num) {
427
- tmp = task_num_list[2];
428
- task_num_list[2] = task_num_list[1];
429
- task_num_list[1] = tmp;
430
- }
431
- if (!rknpu_dev->subcore_datas[task_num_list[0]].job)
432
- core_index = task_num_list[0];
433
- else if (!rknpu_dev->subcore_datas[task_num_list[1]].job)
434
- core_index = task_num_list[1];
435
- else if (!rknpu_dev->subcore_datas[task_num_list[2]].job)
436
- core_index = task_num_list[2];
437
- else
438
- core_index = task_num_list[0];
439
-
524
+ if (job->args->core_mask == RKNPU_CORE_AUTO_MASK) {
525
+ core_index = rknpu_schedule_core_index(rknpu_dev);
440526 job->args->core_mask = rknpu_core_mask(core_index);
441527 job->use_core_num = 1;
442
- job->interrupt_count = 1;
443
- job->run_count = 1;
528
+ atomic_set(&job->run_count, job->use_core_num);
529
+ atomic_set(&job->interrupt_count, job->use_core_num);
444530 }
445531
532
+ spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
446533 for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
447534 if (job->args->core_mask & rknpu_core_mask(i)) {
448535 subcore_data = &rknpu_dev->subcore_datas[i];
449
- spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
450536 list_add_tail(&job->head[i], &subcore_data->todo_list);
451
- subcore_data->task_num += rknn_get_task_number(job, i);
452
- job->in_queue[i] = true;
453
- spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
537
+ subcore_data->task_num += rknpu_get_task_number(job, i);
454538 }
455539 }
540
+ spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
456541
457542 for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
458543 if (job->args->core_mask & rknpu_core_mask(i))
....@@ -464,41 +549,49 @@
464549 {
465550 struct rknpu_device *rknpu_dev = job->rknpu_dev;
466551 struct rknpu_subcore_data *subcore_data = NULL;
467
- int core_index = rknpu_core_index(job->args->core_mask);
468
- void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
469552 unsigned long flags;
470553 int i = 0;
471554
472555 msleep(100);
473556
557
+ spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
474558 for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
475559 if (job->args->core_mask & rknpu_core_mask(i)) {
476560 subcore_data = &rknpu_dev->subcore_datas[i];
477
- spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
478561 if (job == subcore_data->job && !job->irq_entry[i]) {
479562 subcore_data->job = NULL;
480563 subcore_data->task_num -=
481
- rknn_get_task_number(job, i);
564
+ rknpu_get_task_number(job, i);
482565 }
483
- spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
484566 }
485567 }
568
+ spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
486569
487570 if (job->ret == -ETIMEDOUT) {
488
- LOG_ERROR(
489
- "job timeout, flags: %#x, irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x, elapsed time: %lldus\n",
490
- job->flags, REG_READ(RKNPU_OFFSET_INT_STATUS),
491
- REG_READ(RKNPU_OFFSET_INT_RAW_STATUS),
492
- job->int_mask[core_index],
493
- (REG_READ(RKNPU_OFFSET_PC_TASK_STATUS) &
494
- rknpu_dev->config->pc_task_number_mask),
495
- ktime_to_us(ktime_sub(ktime_get(), job->timestamp)));
571
+ LOG_ERROR("job timeout, flags: %#x:\n", job->flags);
572
+ for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
573
+ if (job->args->core_mask & rknpu_core_mask(i)) {
574
+ void __iomem *rknpu_core_base =
575
+ rknpu_dev->base[i];
576
+ LOG_ERROR(
577
+ "\tcore %d irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x, elapsed time: %lldus\n",
578
+ i, REG_READ(RKNPU_OFFSET_INT_STATUS),
579
+ REG_READ(RKNPU_OFFSET_INT_RAW_STATUS),
580
+ job->int_mask[i],
581
+ (REG_READ(
582
+ rknpu_dev->config
583
+ ->pc_task_status_offset) &
584
+ rknpu_dev->config->pc_task_number_mask),
585
+ ktime_us_delta(ktime_get(),
586
+ job->timestamp));
587
+ }
588
+ }
496589 rknpu_soft_reset(rknpu_dev);
497590 } else {
498591 LOG_ERROR(
499592 "job abort, flags: %#x, ret: %d, elapsed time: %lldus\n",
500593 job->flags, job->ret,
501
- ktime_to_us(ktime_sub(ktime_get(), job->timestamp)));
594
+ ktime_us_delta(ktime_get(), job->timestamp));
502595 }
503596
504597 rknpu_job_cleanup(job);
....@@ -560,7 +653,7 @@
560653 "invalid irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x\n",
561654 status, REG_READ(RKNPU_OFFSET_INT_RAW_STATUS),
562655 job->int_mask[core_index],
563
- (REG_READ(RKNPU_OFFSET_PC_TASK_STATUS) &
656
+ (REG_READ(rknpu_dev->config->pc_task_status_offset) &
564657 rknpu_dev->config->pc_task_number_mask));
565658 REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR);
566659 return IRQ_HANDLED;
....@@ -593,7 +686,6 @@
593686 {
594687 struct rknpu_job *job = NULL;
595688 unsigned long flags;
596
- ktime_t now = ktime_get();
597689 struct rknpu_subcore_data *subcore_data = NULL;
598690 int i = 0;
599691
....@@ -602,7 +694,7 @@
602694 subcore_data = &rknpu_dev->subcore_datas[i];
603695 job = subcore_data->job;
604696 if (job &&
605
- ktime_to_ms(ktime_sub(now, job->timestamp)) >=
697
+ ktime_us_delta(ktime_get(), job->timestamp) >=
606698 job->args->timeout) {
607699 rknpu_soft_reset(rknpu_dev);
608700
....@@ -624,7 +716,6 @@
624716 struct rknpu_job,
625717 head[i]);
626718 list_del_init(&job->head[i]);
627
- job->in_queue[i] = false;
628719 } else {
629720 job = NULL;
630721 }
....@@ -645,6 +736,11 @@
645736
646737 if (args->task_number == 0) {
647738 LOG_ERROR("invalid rknpu task number!\n");
739
+ return -EINVAL;
740
+ }
741
+
742
+ if (args->core_mask > rknpu_dev->config->core_mask) {
743
+ LOG_ERROR("invalid rknpu core mask: %#x", args->core_mask);
648744 return -EINVAL;
649745 }
650746
....@@ -779,7 +875,7 @@
779875 return -EINVAL;
780876
781877 *version = REG_READ(RKNPU_OFFSET_VERSION) +
782
- REG_READ(RKNPU_OFFSET_VERSION_NUM);
878
+ (REG_READ(RKNPU_OFFSET_VERSION_NUM) & 0xffff);
783879
784880 return 0;
785881 }
....@@ -845,18 +941,30 @@
845941 int rknpu_clear_rw_amount(struct rknpu_device *rknpu_dev)
846942 {
847943 void __iomem *rknpu_core_base = rknpu_dev->base[0];
944
+ unsigned long flags;
848945
849946 if (!rknpu_dev->config->bw_enable) {
850947 LOG_WARN("Clear rw_amount is not supported on this device!\n");
851948 return 0;
852949 }
853950
854
- spin_lock(&rknpu_dev->lock);
951
+ if (rknpu_dev->config->pc_dma_ctrl) {
952
+ uint32_t pc_data_addr = 0;
855953
856
- REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
857
- REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
954
+ spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
955
+ pc_data_addr = REG_READ(RKNPU_OFFSET_PC_DATA_ADDR);
858956
859
- spin_unlock(&rknpu_dev->lock);
957
+ REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
958
+ REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
959
+ REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
960
+ REG_WRITE(pc_data_addr, RKNPU_OFFSET_PC_DATA_ADDR);
961
+ spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
962
+ } else {
963
+ spin_lock(&rknpu_dev->lock);
964
+ REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
965
+ REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
966
+ spin_unlock(&rknpu_dev->lock);
967
+ }
860968
861969 return 0;
862970 }