| .. | .. |
|---|
| 27 | 27 | { |
|---|
| 28 | 28 | int index = 0; |
|---|
| 29 | 29 | |
|---|
| 30 | | - switch (core_mask & ((1 << RKNPU_MAX_CORES) - 1)) { |
|---|
| 30 | + switch (core_mask) { |
|---|
| 31 | 31 | case RKNPU_CORE0_MASK: |
|---|
| 32 | 32 | case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK: |
|---|
| 33 | 33 | case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK | RKNPU_CORE2_MASK: |
|---|
| .. | .. |
|---|
| 73 | 73 | int task_num = job->args->task_number; |
|---|
| 74 | 74 | |
|---|
| 75 | 75 | if (core_index >= RKNPU_MAX_CORES || core_index < 0) { |
|---|
| 76 | | - LOG_ERROR("core_index: %d set error!", core_index); |
|---|
| 76 | + LOG_ERROR("invalid rknpu core index: %d", core_index); |
|---|
| 77 | 77 | return 0; |
|---|
| 78 | 78 | } |
|---|
| 79 | 79 | |
|---|
| .. | .. |
|---|
| 131 | 131 | #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM |
|---|
| 132 | 132 | struct rknpu_gem_object *task_obj = NULL; |
|---|
| 133 | 133 | #endif |
|---|
| 134 | | - if (rknpu_dev->config->num_irqs == 1) |
|---|
| 135 | | - args->core_mask = RKNPU_CORE0_MASK; |
|---|
| 136 | 134 | |
|---|
| 137 | 135 | job = kzalloc(sizeof(*job), GFP_KERNEL); |
|---|
| 138 | 136 | if (!job) |
|---|
| .. | .. |
|---|
| 197 | 195 | break; |
|---|
| 198 | 196 | |
|---|
| 199 | 197 | if (ret == 0) { |
|---|
| 200 | | - int64_t commit_time = 0; |
|---|
| 198 | + int64_t elapse_time_us = 0; |
|---|
| 201 | 199 | spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
|---|
| 202 | | - commit_time = ktime_us_delta(ktime_get(), |
|---|
| 203 | | - job->commit_pc_time); |
|---|
| 200 | + elapse_time_us = ktime_us_delta(ktime_get(), |
|---|
| 201 | + job->hw_commit_time); |
|---|
| 204 | 202 | continue_wait = |
|---|
| 205 | | - job->commit_pc_time == 0 ? |
|---|
| 203 | + job->hw_commit_time == 0 ? |
|---|
| 206 | 204 | true : |
|---|
| 207 | | - (commit_time < args->timeout * 1000); |
|---|
| 205 | + (elapse_time_us < args->timeout * 1000); |
|---|
| 208 | 206 | spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
|---|
| 209 | 207 | LOG_ERROR( |
|---|
| 210 | | - "job: %p, wait_count: %d, continue_wait: %d, commit time: %lldus, wait time: %lldus, timeout time: %uus\n", |
|---|
| 208 | + "job: %p, wait_count: %d, continue wait: %d, commit elapse time: %lldus, wait time: %lldus, timeout: %uus\n", |
|---|
| 211 | 209 | job, wait_count, continue_wait, |
|---|
| 212 | | - (job->commit_pc_time == 0 ? 0 : commit_time), |
|---|
| 210 | + (job->hw_commit_time == 0 ? 0 : elapse_time_us), |
|---|
| 213 | 211 | ktime_us_delta(ktime_get(), job->timestamp), |
|---|
| 214 | 212 | args->timeout * 1000); |
|---|
| 215 | 213 | } |
|---|
| .. | .. |
|---|
| 217 | 215 | |
|---|
| 218 | 216 | last_task = job->last_task; |
|---|
| 219 | 217 | if (!last_task) { |
|---|
| 220 | | - spin_lock_irqsave(&rknpu_dev->lock, flags); |
|---|
| 218 | + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
|---|
| 221 | 219 | for (i = 0; i < job->use_core_num; i++) { |
|---|
| 222 | 220 | subcore_data = &rknpu_dev->subcore_datas[i]; |
|---|
| 223 | 221 | list_for_each_entry_safe( |
|---|
| .. | .. |
|---|
| 259 | 257 | return -EINVAL; |
|---|
| 260 | 258 | |
|---|
| 261 | 259 | args->task_counter = args->task_number; |
|---|
| 260 | + args->hw_elapse_time = job->hw_elapse_time; |
|---|
| 262 | 261 | |
|---|
| 263 | 262 | return 0; |
|---|
| 264 | 263 | } |
|---|
| .. | .. |
|---|
| 289 | 288 | int i = 0; |
|---|
| 290 | 289 | int submit_index = atomic_read(&job->submit_count[core_index]); |
|---|
| 291 | 290 | int max_submit_number = rknpu_dev->config->max_submit_number; |
|---|
| 291 | + unsigned long flags; |
|---|
| 292 | 292 | |
|---|
| 293 | 293 | if (!task_obj) { |
|---|
| 294 | 294 | job->ret = -EINVAL; |
|---|
| .. | .. |
|---|
| 334 | 334 | first_task = &task_base[task_start]; |
|---|
| 335 | 335 | last_task = &task_base[task_end]; |
|---|
| 336 | 336 | |
|---|
| 337 | | - spin_lock(&rknpu_dev->lock); |
|---|
| 338 | | - REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 339 | | - spin_unlock(&rknpu_dev->lock); |
|---|
| 337 | + if (rknpu_dev->config->pc_dma_ctrl) { |
|---|
| 338 | + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
|---|
| 339 | + REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 340 | + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
|---|
| 341 | + } else { |
|---|
| 342 | + REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 343 | + } |
|---|
| 340 | 344 | |
|---|
| 341 | 345 | REG_WRITE((first_task->regcfg_amount + RKNPU_PC_DATA_EXTRA_AMOUNT + |
|---|
| 342 | 346 | pc_data_amount_scale - 1) / |
|---|
| .. | .. |
|---|
| 363 | 367 | return 0; |
|---|
| 364 | 368 | } |
|---|
| 365 | 369 | |
|---|
| 366 | | -static inline int rknpu_job_subcore_commit(struct rknpu_job *job, int core_index) |
|---|
| 370 | +static inline int rknpu_job_subcore_commit(struct rknpu_job *job, |
|---|
| 371 | + int core_index) |
|---|
| 367 | 372 | { |
|---|
| 368 | 373 | struct rknpu_device *rknpu_dev = job->rknpu_dev; |
|---|
| 369 | 374 | struct rknpu_submit *args = job->args; |
|---|
| 370 | 375 | void __iomem *rknpu_core_base = rknpu_dev->base[core_index]; |
|---|
| 376 | + unsigned long flags; |
|---|
| 371 | 377 | |
|---|
| 372 | 378 | // switch to slave mode |
|---|
| 373 | | - spin_lock(&rknpu_dev->lock); |
|---|
| 374 | | - REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 375 | | - spin_unlock(&rknpu_dev->lock); |
|---|
| 379 | + if (rknpu_dev->config->pc_dma_ctrl) { |
|---|
| 380 | + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
|---|
| 381 | + REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 382 | + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
|---|
| 383 | + } else { |
|---|
| 384 | + REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 385 | + } |
|---|
| 376 | 386 | |
|---|
| 377 | 387 | if (!(args->flags & RKNPU_JOB_PC)) { |
|---|
| 378 | 388 | job->ret = -EINVAL; |
|---|
| .. | .. |
|---|
| 384 | 394 | |
|---|
| 385 | 395 | static void rknpu_job_commit(struct rknpu_job *job) |
|---|
| 386 | 396 | { |
|---|
| 387 | | - switch (job->args->core_mask & ((1 << RKNPU_MAX_CORES) - 1)) { |
|---|
| 397 | + switch (job->args->core_mask) { |
|---|
| 388 | 398 | case RKNPU_CORE0_MASK: |
|---|
| 389 | 399 | rknpu_job_subcore_commit(job, 0); |
|---|
| 390 | 400 | break; |
|---|
| .. | .. |
|---|
| 432 | 442 | |
|---|
| 433 | 443 | list_del_init(&job->head[core_index]); |
|---|
| 434 | 444 | subcore_data->job = job; |
|---|
| 435 | | - job->hw_recoder_time = ktime_get(); |
|---|
| 436 | | - job->commit_pc_time = job->hw_recoder_time; |
|---|
| 445 | + job->hw_commit_time = ktime_get(); |
|---|
| 446 | + job->hw_recoder_time = job->hw_commit_time; |
|---|
| 437 | 447 | spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
|---|
| 438 | 448 | |
|---|
| 439 | 449 | if (atomic_dec_and_test(&job->run_count)) { |
|---|
| .. | .. |
|---|
| 445 | 455 | { |
|---|
| 446 | 456 | struct rknpu_device *rknpu_dev = job->rknpu_dev; |
|---|
| 447 | 457 | struct rknpu_subcore_data *subcore_data = NULL; |
|---|
| 458 | + ktime_t now; |
|---|
| 448 | 459 | unsigned long flags; |
|---|
| 449 | 460 | int max_submit_number = rknpu_dev->config->max_submit_number; |
|---|
| 450 | 461 | |
|---|
| 451 | 462 | if (atomic_inc_return(&job->submit_count[core_index]) < |
|---|
| 452 | 463 | (rknpu_get_task_number(job, core_index) + max_submit_number - 1) / |
|---|
| 453 | 464 | max_submit_number) { |
|---|
| 454 | | - rknpu_job_commit(job); |
|---|
| 465 | + rknpu_job_subcore_commit(job, core_index); |
|---|
| 455 | 466 | return; |
|---|
| 456 | 467 | } |
|---|
| 457 | 468 | |
|---|
| .. | .. |
|---|
| 460 | 471 | spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
|---|
| 461 | 472 | subcore_data->job = NULL; |
|---|
| 462 | 473 | subcore_data->task_num -= rknpu_get_task_number(job, core_index); |
|---|
| 463 | | - subcore_data->timer.busy_time += |
|---|
| 464 | | - ktime_us_delta(ktime_get(), job->hw_recoder_time); |
|---|
| 474 | + now = ktime_get(); |
|---|
| 475 | + job->hw_elapse_time = ktime_sub(now, job->hw_commit_time); |
|---|
| 476 | + subcore_data->timer.busy_time += ktime_sub(now, job->hw_recoder_time); |
|---|
| 465 | 477 | spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
|---|
| 466 | 478 | |
|---|
| 467 | 479 | if (atomic_dec_and_test(&job->interrupt_count)) { |
|---|
| .. | .. |
|---|
| 485 | 497 | rknpu_job_next(rknpu_dev, core_index); |
|---|
| 486 | 498 | } |
|---|
| 487 | 499 | |
|---|
| 500 | +static int rknpu_schedule_core_index(struct rknpu_device *rknpu_dev) |
|---|
| 501 | +{ |
|---|
| 502 | + int core_num = rknpu_dev->config->num_irqs; |
|---|
| 503 | + int task_num = rknpu_dev->subcore_datas[0].task_num; |
|---|
| 504 | + int core_index = 0; |
|---|
| 505 | + int i = 0; |
|---|
| 506 | + |
|---|
| 507 | + for (i = 1; i < core_num; i++) { |
|---|
| 508 | + if (task_num > rknpu_dev->subcore_datas[i].task_num) { |
|---|
| 509 | + core_index = i; |
|---|
| 510 | + task_num = rknpu_dev->subcore_datas[i].task_num; |
|---|
| 511 | + } |
|---|
| 512 | + } |
|---|
| 513 | + |
|---|
| 514 | + return core_index; |
|---|
| 515 | +} |
|---|
| 516 | + |
|---|
| 488 | 517 | static void rknpu_job_schedule(struct rknpu_job *job) |
|---|
| 489 | 518 | { |
|---|
| 490 | 519 | struct rknpu_device *rknpu_dev = job->rknpu_dev; |
|---|
| 491 | 520 | struct rknpu_subcore_data *subcore_data = NULL; |
|---|
| 492 | 521 | int i = 0, core_index = 0; |
|---|
| 493 | 522 | unsigned long flags; |
|---|
| 494 | | - int task_num_list[3] = { 0, 1, 2 }; |
|---|
| 495 | | - int tmp = 0; |
|---|
| 496 | 523 | |
|---|
| 497 | | - if ((job->args->core_mask & ((1 << RKNPU_MAX_CORES) - 1)) == |
|---|
| 498 | | - RKNPU_CORE_AUTO_MASK) { |
|---|
| 499 | | - if (rknpu_dev->subcore_datas[0].task_num > |
|---|
| 500 | | - rknpu_dev->subcore_datas[1].task_num) { |
|---|
| 501 | | - tmp = task_num_list[1]; |
|---|
| 502 | | - task_num_list[1] = task_num_list[0]; |
|---|
| 503 | | - task_num_list[0] = tmp; |
|---|
| 504 | | - } |
|---|
| 505 | | - if (rknpu_dev->subcore_datas[task_num_list[0]].task_num > |
|---|
| 506 | | - rknpu_dev->subcore_datas[2].task_num) { |
|---|
| 507 | | - tmp = task_num_list[2]; |
|---|
| 508 | | - task_num_list[2] = task_num_list[1]; |
|---|
| 509 | | - task_num_list[1] = task_num_list[0]; |
|---|
| 510 | | - task_num_list[0] = tmp; |
|---|
| 511 | | - } else if (rknpu_dev->subcore_datas[task_num_list[1]].task_num > |
|---|
| 512 | | - rknpu_dev->subcore_datas[2].task_num) { |
|---|
| 513 | | - tmp = task_num_list[2]; |
|---|
| 514 | | - task_num_list[2] = task_num_list[1]; |
|---|
| 515 | | - task_num_list[1] = tmp; |
|---|
| 516 | | - } |
|---|
| 517 | | - if (!rknpu_dev->subcore_datas[task_num_list[0]].job) |
|---|
| 518 | | - core_index = task_num_list[0]; |
|---|
| 519 | | - else if (!rknpu_dev->subcore_datas[task_num_list[1]].job) |
|---|
| 520 | | - core_index = task_num_list[1]; |
|---|
| 521 | | - else if (!rknpu_dev->subcore_datas[task_num_list[2]].job) |
|---|
| 522 | | - core_index = task_num_list[2]; |
|---|
| 523 | | - else |
|---|
| 524 | | - core_index = task_num_list[0]; |
|---|
| 525 | | - |
|---|
| 524 | + if (job->args->core_mask == RKNPU_CORE_AUTO_MASK) { |
|---|
| 525 | + core_index = rknpu_schedule_core_index(rknpu_dev); |
|---|
| 526 | 526 | job->args->core_mask = rknpu_core_mask(core_index); |
|---|
| 527 | 527 | job->use_core_num = 1; |
|---|
| 528 | 528 | atomic_set(&job->run_count, job->use_core_num); |
|---|
| .. | .. |
|---|
| 739 | 739 | return -EINVAL; |
|---|
| 740 | 740 | } |
|---|
| 741 | 741 | |
|---|
| 742 | + if (args->core_mask > rknpu_dev->config->core_mask) { |
|---|
| 743 | + LOG_ERROR("invalid rknpu core mask: %#x", args->core_mask); |
|---|
| 744 | + return -EINVAL; |
|---|
| 745 | + } |
|---|
| 746 | + |
|---|
| 742 | 747 | job = rknpu_job_alloc(rknpu_dev, args); |
|---|
| 743 | 748 | if (!job) { |
|---|
| 744 | 749 | LOG_ERROR("failed to allocate rknpu job!\n"); |
|---|
| .. | .. |
|---|
| 936 | 941 | int rknpu_clear_rw_amount(struct rknpu_device *rknpu_dev) |
|---|
| 937 | 942 | { |
|---|
| 938 | 943 | void __iomem *rknpu_core_base = rknpu_dev->base[0]; |
|---|
| 944 | + unsigned long flags; |
|---|
| 939 | 945 | |
|---|
| 940 | 946 | if (!rknpu_dev->config->bw_enable) { |
|---|
| 941 | 947 | LOG_WARN("Clear rw_amount is not supported on this device!\n"); |
|---|
| 942 | 948 | return 0; |
|---|
| 943 | 949 | } |
|---|
| 944 | 950 | |
|---|
| 945 | | - spin_lock(&rknpu_dev->lock); |
|---|
| 946 | | - |
|---|
| 947 | 951 | if (rknpu_dev->config->pc_dma_ctrl) { |
|---|
| 948 | | - uint32_t pc_data_addr = REG_READ(RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 952 | + uint32_t pc_data_addr = 0; |
|---|
| 953 | + |
|---|
| 954 | + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
|---|
| 955 | + pc_data_addr = REG_READ(RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 949 | 956 | |
|---|
| 950 | 957 | REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 951 | 958 | REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); |
|---|
| 952 | 959 | REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); |
|---|
| 953 | 960 | REG_WRITE(pc_data_addr, RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 961 | + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
|---|
| 954 | 962 | } else { |
|---|
| 963 | + spin_lock(&rknpu_dev->lock); |
|---|
| 955 | 964 | REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); |
|---|
| 956 | 965 | REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); |
|---|
| 966 | + spin_unlock(&rknpu_dev->lock); |
|---|
| 957 | 967 | } |
|---|
| 958 | | - |
|---|
| 959 | | - spin_unlock(&rknpu_dev->lock); |
|---|
| 960 | 968 | |
|---|
| 961 | 969 | return 0; |
|---|
| 962 | 970 | } |
|---|