.. | .. |
---|
27 | 27 | { |
---|
28 | 28 | int index = 0; |
---|
29 | 29 | |
---|
30 | | - switch (core_mask & ((1 << RKNPU_MAX_CORES) - 1)) { |
---|
| 30 | + switch (core_mask) { |
---|
31 | 31 | case RKNPU_CORE0_MASK: |
---|
32 | 32 | case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK: |
---|
33 | 33 | case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK | RKNPU_CORE2_MASK: |
---|
.. | .. |
---|
73 | 73 | int task_num = job->args->task_number; |
---|
74 | 74 | |
---|
75 | 75 | if (core_index >= RKNPU_MAX_CORES || core_index < 0) { |
---|
76 | | - LOG_ERROR("core_index: %d set error!", core_index); |
---|
| 76 | + LOG_ERROR("invalid rknpu core index: %d", core_index); |
---|
77 | 77 | return 0; |
---|
78 | 78 | } |
---|
79 | 79 | |
---|
.. | .. |
---|
131 | 131 | #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM |
---|
132 | 132 | struct rknpu_gem_object *task_obj = NULL; |
---|
133 | 133 | #endif |
---|
134 | | - if (rknpu_dev->config->num_irqs == 1) |
---|
135 | | - args->core_mask = RKNPU_CORE0_MASK; |
---|
136 | 134 | |
---|
137 | 135 | job = kzalloc(sizeof(*job), GFP_KERNEL); |
---|
138 | 136 | if (!job) |
---|
.. | .. |
---|
197 | 195 | break; |
---|
198 | 196 | |
---|
199 | 197 | if (ret == 0) { |
---|
200 | | - int64_t commit_time = 0; |
---|
| 198 | + int64_t elapse_time_us = 0; |
---|
201 | 199 | spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
---|
202 | | - commit_time = ktime_us_delta(ktime_get(), |
---|
203 | | - job->commit_pc_time); |
---|
| 200 | + elapse_time_us = ktime_us_delta(ktime_get(), |
---|
| 201 | + job->hw_commit_time); |
---|
204 | 202 | continue_wait = |
---|
205 | | - job->commit_pc_time == 0 ? |
---|
| 203 | + job->hw_commit_time == 0 ? |
---|
206 | 204 | true : |
---|
207 | | - (commit_time < args->timeout * 1000); |
---|
| 205 | + (elapse_time_us < args->timeout * 1000); |
---|
208 | 206 | spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
---|
209 | 207 | LOG_ERROR( |
---|
210 | | - "job: %p, wait_count: %d, continue_wait: %d, commit time: %lldus, wait time: %lldus, timeout time: %uus\n", |
---|
| 208 | + "job: %p, wait_count: %d, continue wait: %d, commit elapse time: %lldus, wait time: %lldus, timeout: %uus\n", |
---|
211 | 209 | job, wait_count, continue_wait, |
---|
212 | | - (job->commit_pc_time == 0 ? 0 : commit_time), |
---|
| 210 | + (job->hw_commit_time == 0 ? 0 : elapse_time_us), |
---|
213 | 211 | ktime_us_delta(ktime_get(), job->timestamp), |
---|
214 | 212 | args->timeout * 1000); |
---|
215 | 213 | } |
---|
.. | .. |
---|
217 | 215 | |
---|
218 | 216 | last_task = job->last_task; |
---|
219 | 217 | if (!last_task) { |
---|
220 | | - spin_lock_irqsave(&rknpu_dev->lock, flags); |
---|
| 218 | + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
---|
221 | 219 | for (i = 0; i < job->use_core_num; i++) { |
---|
222 | 220 | subcore_data = &rknpu_dev->subcore_datas[i]; |
---|
223 | 221 | list_for_each_entry_safe( |
---|
.. | .. |
---|
259 | 257 | return -EINVAL; |
---|
260 | 258 | |
---|
261 | 259 | args->task_counter = args->task_number; |
---|
| 260 | + args->hw_elapse_time = job->hw_elapse_time; |
---|
262 | 261 | |
---|
263 | 262 | return 0; |
---|
264 | 263 | } |
---|
.. | .. |
---|
289 | 288 | int i = 0; |
---|
290 | 289 | int submit_index = atomic_read(&job->submit_count[core_index]); |
---|
291 | 290 | int max_submit_number = rknpu_dev->config->max_submit_number; |
---|
| 291 | + unsigned long flags; |
---|
292 | 292 | |
---|
293 | 293 | if (!task_obj) { |
---|
294 | 294 | job->ret = -EINVAL; |
---|
.. | .. |
---|
334 | 334 | first_task = &task_base[task_start]; |
---|
335 | 335 | last_task = &task_base[task_end]; |
---|
336 | 336 | |
---|
337 | | - spin_lock(&rknpu_dev->lock); |
---|
338 | | - REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR); |
---|
339 | | - spin_unlock(&rknpu_dev->lock); |
---|
| 337 | + if (rknpu_dev->config->pc_dma_ctrl) { |
---|
| 338 | + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
---|
| 339 | + REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR); |
---|
| 340 | + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
---|
| 341 | + } else { |
---|
| 342 | + REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR); |
---|
| 343 | + } |
---|
340 | 344 | |
---|
341 | 345 | REG_WRITE((first_task->regcfg_amount + RKNPU_PC_DATA_EXTRA_AMOUNT + |
---|
342 | 346 | pc_data_amount_scale - 1) / |
---|
.. | .. |
---|
363 | 367 | return 0; |
---|
364 | 368 | } |
---|
365 | 369 | |
---|
366 | | -static inline int rknpu_job_subcore_commit(struct rknpu_job *job, int core_index) |
---|
| 370 | +static inline int rknpu_job_subcore_commit(struct rknpu_job *job, |
---|
| 371 | + int core_index) |
---|
367 | 372 | { |
---|
368 | 373 | struct rknpu_device *rknpu_dev = job->rknpu_dev; |
---|
369 | 374 | struct rknpu_submit *args = job->args; |
---|
370 | 375 | void __iomem *rknpu_core_base = rknpu_dev->base[core_index]; |
---|
| 376 | + unsigned long flags; |
---|
371 | 377 | |
---|
372 | 378 | // switch to slave mode |
---|
373 | | - spin_lock(&rknpu_dev->lock); |
---|
374 | | - REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); |
---|
375 | | - spin_unlock(&rknpu_dev->lock); |
---|
| 379 | + if (rknpu_dev->config->pc_dma_ctrl) { |
---|
| 380 | + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
---|
| 381 | + REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); |
---|
| 382 | + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
---|
| 383 | + } else { |
---|
| 384 | + REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); |
---|
| 385 | + } |
---|
376 | 386 | |
---|
377 | 387 | if (!(args->flags & RKNPU_JOB_PC)) { |
---|
378 | 388 | job->ret = -EINVAL; |
---|
.. | .. |
---|
384 | 394 | |
---|
385 | 395 | static void rknpu_job_commit(struct rknpu_job *job) |
---|
386 | 396 | { |
---|
387 | | - switch (job->args->core_mask & ((1 << RKNPU_MAX_CORES) - 1)) { |
---|
| 397 | + switch (job->args->core_mask) { |
---|
388 | 398 | case RKNPU_CORE0_MASK: |
---|
389 | 399 | rknpu_job_subcore_commit(job, 0); |
---|
390 | 400 | break; |
---|
.. | .. |
---|
432 | 442 | |
---|
433 | 443 | list_del_init(&job->head[core_index]); |
---|
434 | 444 | subcore_data->job = job; |
---|
435 | | - job->hw_recoder_time = ktime_get(); |
---|
436 | | - job->commit_pc_time = job->hw_recoder_time; |
---|
| 445 | + job->hw_commit_time = ktime_get(); |
---|
| 446 | + job->hw_recoder_time = job->hw_commit_time; |
---|
437 | 447 | spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
---|
438 | 448 | |
---|
439 | 449 | if (atomic_dec_and_test(&job->run_count)) { |
---|
.. | .. |
---|
445 | 455 | { |
---|
446 | 456 | struct rknpu_device *rknpu_dev = job->rknpu_dev; |
---|
447 | 457 | struct rknpu_subcore_data *subcore_data = NULL; |
---|
| 458 | + ktime_t now; |
---|
448 | 459 | unsigned long flags; |
---|
449 | 460 | int max_submit_number = rknpu_dev->config->max_submit_number; |
---|
450 | 461 | |
---|
451 | 462 | if (atomic_inc_return(&job->submit_count[core_index]) < |
---|
452 | 463 | (rknpu_get_task_number(job, core_index) + max_submit_number - 1) / |
---|
453 | 464 | max_submit_number) { |
---|
454 | | - rknpu_job_commit(job); |
---|
| 465 | + rknpu_job_subcore_commit(job, core_index); |
---|
455 | 466 | return; |
---|
456 | 467 | } |
---|
457 | 468 | |
---|
.. | .. |
---|
460 | 471 | spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
---|
461 | 472 | subcore_data->job = NULL; |
---|
462 | 473 | subcore_data->task_num -= rknpu_get_task_number(job, core_index); |
---|
463 | | - subcore_data->timer.busy_time += |
---|
464 | | - ktime_us_delta(ktime_get(), job->hw_recoder_time); |
---|
| 474 | + now = ktime_get(); |
---|
| 475 | + job->hw_elapse_time = ktime_sub(now, job->hw_commit_time); |
---|
| 476 | + subcore_data->timer.busy_time += ktime_sub(now, job->hw_recoder_time); |
---|
465 | 477 | spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
---|
466 | 478 | |
---|
467 | 479 | if (atomic_dec_and_test(&job->interrupt_count)) { |
---|
.. | .. |
---|
485 | 497 | rknpu_job_next(rknpu_dev, core_index); |
---|
486 | 498 | } |
---|
487 | 499 | |
---|
| 500 | +static int rknpu_schedule_core_index(struct rknpu_device *rknpu_dev) |
---|
| 501 | +{ |
---|
| 502 | + int core_num = rknpu_dev->config->num_irqs; |
---|
| 503 | + int task_num = rknpu_dev->subcore_datas[0].task_num; |
---|
| 504 | + int core_index = 0; |
---|
| 505 | + int i = 0; |
---|
| 506 | + |
---|
| 507 | + for (i = 1; i < core_num; i++) { |
---|
| 508 | + if (task_num > rknpu_dev->subcore_datas[i].task_num) { |
---|
| 509 | + core_index = i; |
---|
| 510 | + task_num = rknpu_dev->subcore_datas[i].task_num; |
---|
| 511 | + } |
---|
| 512 | + } |
---|
| 513 | + |
---|
| 514 | + return core_index; |
---|
| 515 | +} |
---|
| 516 | + |
---|
488 | 517 | static void rknpu_job_schedule(struct rknpu_job *job) |
---|
489 | 518 | { |
---|
490 | 519 | struct rknpu_device *rknpu_dev = job->rknpu_dev; |
---|
491 | 520 | struct rknpu_subcore_data *subcore_data = NULL; |
---|
492 | 521 | int i = 0, core_index = 0; |
---|
493 | 522 | unsigned long flags; |
---|
494 | | - int task_num_list[3] = { 0, 1, 2 }; |
---|
495 | | - int tmp = 0; |
---|
496 | 523 | |
---|
497 | | - if ((job->args->core_mask & ((1 << RKNPU_MAX_CORES) - 1)) == |
---|
498 | | - RKNPU_CORE_AUTO_MASK) { |
---|
499 | | - if (rknpu_dev->subcore_datas[0].task_num > |
---|
500 | | - rknpu_dev->subcore_datas[1].task_num) { |
---|
501 | | - tmp = task_num_list[1]; |
---|
502 | | - task_num_list[1] = task_num_list[0]; |
---|
503 | | - task_num_list[0] = tmp; |
---|
504 | | - } |
---|
505 | | - if (rknpu_dev->subcore_datas[task_num_list[0]].task_num > |
---|
506 | | - rknpu_dev->subcore_datas[2].task_num) { |
---|
507 | | - tmp = task_num_list[2]; |
---|
508 | | - task_num_list[2] = task_num_list[1]; |
---|
509 | | - task_num_list[1] = task_num_list[0]; |
---|
510 | | - task_num_list[0] = tmp; |
---|
511 | | - } else if (rknpu_dev->subcore_datas[task_num_list[1]].task_num > |
---|
512 | | - rknpu_dev->subcore_datas[2].task_num) { |
---|
513 | | - tmp = task_num_list[2]; |
---|
514 | | - task_num_list[2] = task_num_list[1]; |
---|
515 | | - task_num_list[1] = tmp; |
---|
516 | | - } |
---|
517 | | - if (!rknpu_dev->subcore_datas[task_num_list[0]].job) |
---|
518 | | - core_index = task_num_list[0]; |
---|
519 | | - else if (!rknpu_dev->subcore_datas[task_num_list[1]].job) |
---|
520 | | - core_index = task_num_list[1]; |
---|
521 | | - else if (!rknpu_dev->subcore_datas[task_num_list[2]].job) |
---|
522 | | - core_index = task_num_list[2]; |
---|
523 | | - else |
---|
524 | | - core_index = task_num_list[0]; |
---|
525 | | - |
---|
| 524 | + if (job->args->core_mask == RKNPU_CORE_AUTO_MASK) { |
---|
| 525 | + core_index = rknpu_schedule_core_index(rknpu_dev); |
---|
526 | 526 | job->args->core_mask = rknpu_core_mask(core_index); |
---|
527 | 527 | job->use_core_num = 1; |
---|
528 | 528 | atomic_set(&job->run_count, job->use_core_num); |
---|
.. | .. |
---|
739 | 739 | return -EINVAL; |
---|
740 | 740 | } |
---|
741 | 741 | |
---|
| 742 | + if (args->core_mask > rknpu_dev->config->core_mask) { |
---|
| 743 | + LOG_ERROR("invalid rknpu core mask: %#x", args->core_mask); |
---|
| 744 | + return -EINVAL; |
---|
| 745 | + } |
---|
| 746 | + |
---|
742 | 747 | job = rknpu_job_alloc(rknpu_dev, args); |
---|
743 | 748 | if (!job) { |
---|
744 | 749 | LOG_ERROR("failed to allocate rknpu job!\n"); |
---|
.. | .. |
---|
936 | 941 | int rknpu_clear_rw_amount(struct rknpu_device *rknpu_dev) |
---|
937 | 942 | { |
---|
938 | 943 | void __iomem *rknpu_core_base = rknpu_dev->base[0]; |
---|
| 944 | + unsigned long flags; |
---|
939 | 945 | |
---|
940 | 946 | if (!rknpu_dev->config->bw_enable) { |
---|
941 | 947 | LOG_WARN("Clear rw_amount is not supported on this device!\n"); |
---|
942 | 948 | return 0; |
---|
943 | 949 | } |
---|
944 | 950 | |
---|
945 | | - spin_lock(&rknpu_dev->lock); |
---|
946 | | - |
---|
947 | 951 | if (rknpu_dev->config->pc_dma_ctrl) { |
---|
948 | | - uint32_t pc_data_addr = REG_READ(RKNPU_OFFSET_PC_DATA_ADDR); |
---|
| 952 | + uint32_t pc_data_addr = 0; |
---|
| 953 | + |
---|
| 954 | + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
---|
| 955 | + pc_data_addr = REG_READ(RKNPU_OFFSET_PC_DATA_ADDR); |
---|
949 | 956 | |
---|
950 | 957 | REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); |
---|
951 | 958 | REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); |
---|
952 | 959 | REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); |
---|
953 | 960 | REG_WRITE(pc_data_addr, RKNPU_OFFSET_PC_DATA_ADDR); |
---|
| 961 | + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
---|
954 | 962 | } else { |
---|
| 963 | + spin_lock(&rknpu_dev->lock); |
---|
955 | 964 | REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); |
---|
956 | 965 | REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); |
---|
| 966 | + spin_unlock(&rknpu_dev->lock); |
---|
957 | 967 | } |
---|
958 | | - |
---|
959 | | - spin_unlock(&rknpu_dev->lock); |
---|
960 | 968 | |
---|
961 | 969 | return 0; |
---|
962 | 970 | } |
---|