| .. | .. |
|---|
| 23 | 23 | #define REG_READ(offset) _REG_READ(rknpu_core_base, offset) |
|---|
| 24 | 24 | #define REG_WRITE(value, offset) _REG_WRITE(rknpu_core_base, value, offset) |
|---|
| 25 | 25 | |
|---|
| 26 | | -static int rknpu_core_index(int core_mask) |
|---|
| 26 | +static int rknpu_wait_core_index(int core_mask) |
|---|
| 27 | 27 | { |
|---|
| 28 | 28 | int index = 0; |
|---|
| 29 | 29 | |
|---|
| 30 | | - if (core_mask & RKNPU_CORE0_MASK) |
|---|
| 30 | + switch (core_mask) { |
|---|
| 31 | + case RKNPU_CORE0_MASK: |
|---|
| 32 | + case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK: |
|---|
| 33 | + case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK | RKNPU_CORE2_MASK: |
|---|
| 31 | 34 | index = 0; |
|---|
| 32 | | - else if (core_mask & RKNPU_CORE1_MASK) |
|---|
| 35 | + break; |
|---|
| 36 | + case RKNPU_CORE1_MASK: |
|---|
| 33 | 37 | index = 1; |
|---|
| 34 | | - else if (core_mask & RKNPU_CORE2_MASK) |
|---|
| 38 | + break; |
|---|
| 39 | + case RKNPU_CORE2_MASK: |
|---|
| 35 | 40 | index = 2; |
|---|
| 41 | + break; |
|---|
| 42 | + default: |
|---|
| 43 | + break; |
|---|
| 44 | + } |
|---|
| 36 | 45 | |
|---|
| 37 | 46 | return index; |
|---|
| 38 | 47 | } |
|---|
| .. | .. |
|---|
| 58 | 67 | return core_mask; |
|---|
| 59 | 68 | } |
|---|
| 60 | 69 | |
|---|
| 61 | | -static int rknn_get_task_number(struct rknpu_job *job, int core_index) |
|---|
| 70 | +static int rknpu_get_task_number(struct rknpu_job *job, int core_index) |
|---|
| 62 | 71 | { |
|---|
| 72 | + struct rknpu_device *rknpu_dev = job->rknpu_dev; |
|---|
| 63 | 73 | int task_num = job->args->task_number; |
|---|
| 64 | 74 | |
|---|
| 65 | | - if (job->use_core_num == 2) |
|---|
| 66 | | - task_num = job->args->subcore_task[core_index].task_number; |
|---|
| 67 | | - else if (job->use_core_num == 3) |
|---|
| 68 | | - task_num = job->args->subcore_task[core_index + 2].task_number; |
|---|
| 75 | + if (core_index >= RKNPU_MAX_CORES || core_index < 0) { |
|---|
| 76 | + LOG_ERROR("invalid rknpu core index: %d", core_index); |
|---|
| 77 | + return 0; |
|---|
| 78 | + } |
|---|
| 79 | + |
|---|
| 80 | + if (rknpu_dev->config->num_irqs > 1) { |
|---|
| 81 | + if (job->use_core_num == 1 || job->use_core_num == 2) |
|---|
| 82 | + task_num = |
|---|
| 83 | + job->args->subcore_task[core_index].task_number; |
|---|
| 84 | + else if (job->use_core_num == 3) |
|---|
| 85 | + task_num = job->args->subcore_task[core_index + 2] |
|---|
| 86 | + .task_number; |
|---|
| 87 | + } |
|---|
| 69 | 88 | |
|---|
| 70 | 89 | return task_num; |
|---|
| 71 | 90 | } |
|---|
| .. | .. |
|---|
| 112 | 131 | #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM |
|---|
| 113 | 132 | struct rknpu_gem_object *task_obj = NULL; |
|---|
| 114 | 133 | #endif |
|---|
| 115 | | - if (rknpu_dev->config->num_irqs == 1) |
|---|
| 116 | | - args->core_mask = RKNPU_CORE0_MASK; |
|---|
| 117 | 134 | |
|---|
| 118 | 135 | job = kzalloc(sizeof(*job), GFP_KERNEL); |
|---|
| 119 | 136 | if (!job) |
|---|
| .. | .. |
|---|
| 157 | 174 | struct rknpu_submit *args = job->args; |
|---|
| 158 | 175 | struct rknpu_task *last_task = NULL; |
|---|
| 159 | 176 | struct rknpu_subcore_data *subcore_data = NULL; |
|---|
| 177 | + struct rknpu_job *entry, *q; |
|---|
| 160 | 178 | void __iomem *rknpu_core_base = NULL; |
|---|
| 161 | | - int core_index = rknpu_core_index(job->args->core_mask); |
|---|
| 179 | + int core_index = rknpu_wait_core_index(job->args->core_mask); |
|---|
| 162 | 180 | unsigned long flags; |
|---|
| 163 | 181 | int wait_count = 0; |
|---|
| 182 | + bool continue_wait = false; |
|---|
| 164 | 183 | int ret = -EINVAL; |
|---|
| 165 | 184 | int i = 0; |
|---|
| 166 | 185 | |
|---|
| .. | .. |
|---|
| 171 | 190 | job->flags & RKNPU_JOB_DONE || |
|---|
| 172 | 191 | rknpu_dev->soft_reseting, |
|---|
| 173 | 192 | msecs_to_jiffies(args->timeout)); |
|---|
| 193 | + |
|---|
| 174 | 194 | if (++wait_count >= 3) |
|---|
| 175 | 195 | break; |
|---|
| 176 | | - } while (ret == 0 && job->in_queue[core_index]); |
|---|
| 177 | 196 | |
|---|
| 178 | | - if (job->in_queue[core_index]) { |
|---|
| 179 | | - spin_lock_irqsave(&rknpu_dev->lock, flags); |
|---|
| 180 | | - subcore_data->task_num -= rknn_get_task_number(job, core_index); |
|---|
| 181 | | - if (job->use_core_num == 1) { |
|---|
| 182 | | - list_del_init(&job->head[core_index]); |
|---|
| 183 | | - job->in_queue[core_index] = false; |
|---|
| 184 | | - } else if (job->use_core_num > 1) { |
|---|
| 185 | | - for (i = 0; i < job->use_core_num; i++) { |
|---|
| 186 | | - if (job->in_queue[i]) { |
|---|
| 187 | | - list_del_init(&job->head[i]); |
|---|
| 188 | | - job->in_queue[i] = false; |
|---|
| 197 | + if (ret == 0) { |
|---|
| 198 | + int64_t elapse_time_us = 0; |
|---|
| 199 | + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
|---|
| 200 | + elapse_time_us = ktime_us_delta(ktime_get(), |
|---|
| 201 | + job->hw_commit_time); |
|---|
| 202 | + continue_wait = |
|---|
| 203 | + job->hw_commit_time == 0 ? |
|---|
| 204 | + true : |
|---|
| 205 | + (elapse_time_us < args->timeout * 1000); |
|---|
| 206 | + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
|---|
| 207 | + LOG_ERROR( |
|---|
| 208 | + "job: %p, wait_count: %d, continue wait: %d, commit elapse time: %lldus, wait time: %lldus, timeout: %uus\n", |
|---|
| 209 | + job, wait_count, continue_wait, |
|---|
| 210 | + (job->hw_commit_time == 0 ? 0 : elapse_time_us), |
|---|
| 211 | + ktime_us_delta(ktime_get(), job->timestamp), |
|---|
| 212 | + args->timeout * 1000); |
|---|
| 213 | + } |
|---|
| 214 | + } while (ret == 0 && continue_wait); |
|---|
| 215 | + |
|---|
| 216 | + last_task = job->last_task; |
|---|
| 217 | + if (!last_task) { |
|---|
| 218 | + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
|---|
| 219 | + for (i = 0; i < job->use_core_num; i++) { |
|---|
| 220 | + subcore_data = &rknpu_dev->subcore_datas[i]; |
|---|
| 221 | + list_for_each_entry_safe( |
|---|
| 222 | + entry, q, &subcore_data->todo_list, head[i]) { |
|---|
| 223 | + if (entry == job) { |
|---|
| 224 | + list_del(&job->head[i]); |
|---|
| 225 | + break; |
|---|
| 189 | 226 | } |
|---|
| 190 | 227 | } |
|---|
| 191 | 228 | } |
|---|
| 192 | | - spin_unlock_irqrestore(&rknpu_dev->lock, flags); |
|---|
| 229 | + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
|---|
| 230 | + |
|---|
| 231 | + LOG_ERROR("job commit failed\n"); |
|---|
| 193 | 232 | return ret < 0 ? ret : -EINVAL; |
|---|
| 194 | 233 | } |
|---|
| 195 | | - |
|---|
| 196 | | - last_task = job->last_task; |
|---|
| 197 | | - if (!last_task) |
|---|
| 198 | | - return ret < 0 ? ret : -EINVAL; |
|---|
| 199 | 234 | |
|---|
| 200 | 235 | last_task->int_status = job->int_status[core_index]; |
|---|
| 201 | 236 | |
|---|
| .. | .. |
|---|
| 213 | 248 | LOG_ERROR( |
|---|
| 214 | 249 | "failed to wait job, task counter: %d, flags: %#x, ret = %d, elapsed time: %lldus\n", |
|---|
| 215 | 250 | args->task_counter, args->flags, ret, |
|---|
| 216 | | - ktime_to_us(ktime_sub(ktime_get(), job->timestamp))); |
|---|
| 251 | + ktime_us_delta(ktime_get(), job->timestamp)); |
|---|
| 217 | 252 | |
|---|
| 218 | 253 | return ret < 0 ? ret : -ETIMEDOUT; |
|---|
| 219 | 254 | } |
|---|
| .. | .. |
|---|
| 222 | 257 | return -EINVAL; |
|---|
| 223 | 258 | |
|---|
| 224 | 259 | args->task_counter = args->task_number; |
|---|
| 260 | + args->hw_elapse_time = job->hw_elapse_time; |
|---|
| 225 | 261 | |
|---|
| 226 | 262 | return 0; |
|---|
| 227 | 263 | } |
|---|
| 228 | 264 | |
|---|
| 229 | | -static inline int rknpu_job_commit_pc(struct rknpu_job *job, int core_index) |
|---|
| 265 | +static inline int rknpu_job_subcore_commit_pc(struct rknpu_job *job, |
|---|
| 266 | + int core_index) |
|---|
| 230 | 267 | { |
|---|
| 231 | 268 | struct rknpu_device *rknpu_dev = job->rknpu_dev; |
|---|
| 232 | 269 | struct rknpu_submit *args = job->args; |
|---|
| .. | .. |
|---|
| 243 | 280 | struct rknpu_task *last_task = NULL; |
|---|
| 244 | 281 | void __iomem *rknpu_core_base = rknpu_dev->base[core_index]; |
|---|
| 245 | 282 | int task_start = args->task_start; |
|---|
| 246 | | - int task_end = args->task_start + args->task_number - 1; |
|---|
| 283 | + int task_end; |
|---|
| 247 | 284 | int task_number = args->task_number; |
|---|
| 248 | 285 | int task_pp_en = args->flags & RKNPU_JOB_PINGPONG ? 1 : 0; |
|---|
| 249 | 286 | int pc_data_amount_scale = rknpu_dev->config->pc_data_amount_scale; |
|---|
| 250 | 287 | int pc_task_number_bits = rknpu_dev->config->pc_task_number_bits; |
|---|
| 251 | 288 | int i = 0; |
|---|
| 289 | + int submit_index = atomic_read(&job->submit_count[core_index]); |
|---|
| 290 | + int max_submit_number = rknpu_dev->config->max_submit_number; |
|---|
| 291 | + unsigned long flags; |
|---|
| 252 | 292 | |
|---|
| 253 | | - if (!task_obj) |
|---|
| 254 | | - return -EINVAL; |
|---|
| 293 | + if (!task_obj) { |
|---|
| 294 | + job->ret = -EINVAL; |
|---|
| 295 | + return job->ret; |
|---|
| 296 | + } |
|---|
| 255 | 297 | |
|---|
| 256 | 298 | if (rknpu_dev->config->num_irqs > 1) { |
|---|
| 257 | 299 | for (i = 0; i < rknpu_dev->config->num_irqs; i++) { |
|---|
| .. | .. |
|---|
| 261 | 303 | } |
|---|
| 262 | 304 | } |
|---|
| 263 | 305 | |
|---|
| 264 | | - if (job->use_core_num == 1) { |
|---|
| 306 | + switch (job->use_core_num) { |
|---|
| 307 | + case 1: |
|---|
| 308 | + case 2: |
|---|
| 265 | 309 | task_start = args->subcore_task[core_index].task_start; |
|---|
| 266 | | - task_end = args->subcore_task[core_index].task_start + |
|---|
| 267 | | - args->subcore_task[core_index].task_number - |
|---|
| 268 | | - 1; |
|---|
| 269 | 310 | task_number = |
|---|
| 270 | 311 | args->subcore_task[core_index].task_number; |
|---|
| 271 | | - } else if (job->use_core_num == 2) { |
|---|
| 272 | | - task_start = args->subcore_task[core_index].task_start; |
|---|
| 273 | | - task_end = args->subcore_task[core_index].task_start + |
|---|
| 274 | | - args->subcore_task[core_index].task_number - |
|---|
| 275 | | - 1; |
|---|
| 276 | | - task_number = |
|---|
| 277 | | - args->subcore_task[core_index].task_number; |
|---|
| 278 | | - } else if (job->use_core_num == 3) { |
|---|
| 312 | + break; |
|---|
| 313 | + case 3: |
|---|
| 279 | 314 | task_start = |
|---|
| 280 | 315 | args->subcore_task[core_index + 2].task_start; |
|---|
| 281 | | - task_end = |
|---|
| 282 | | - args->subcore_task[core_index + 2].task_start + |
|---|
| 283 | | - args->subcore_task[core_index + 2].task_number - |
|---|
| 284 | | - 1; |
|---|
| 285 | 316 | task_number = |
|---|
| 286 | 317 | args->subcore_task[core_index + 2].task_number; |
|---|
| 318 | + break; |
|---|
| 319 | + default: |
|---|
| 320 | + LOG_ERROR("Unknown use core num %d\n", |
|---|
| 321 | + job->use_core_num); |
|---|
| 322 | + break; |
|---|
| 287 | 323 | } |
|---|
| 288 | 324 | } |
|---|
| 325 | + |
|---|
| 326 | + task_start = task_start + submit_index * max_submit_number; |
|---|
| 327 | + task_number = task_number - submit_index * max_submit_number; |
|---|
| 328 | + task_number = task_number > max_submit_number ? max_submit_number : |
|---|
| 329 | + task_number; |
|---|
| 330 | + task_end = task_start + task_number - 1; |
|---|
| 289 | 331 | |
|---|
| 290 | 332 | task_base = task_obj->kv_addr; |
|---|
| 291 | 333 | |
|---|
| 292 | 334 | first_task = &task_base[task_start]; |
|---|
| 293 | 335 | last_task = &task_base[task_end]; |
|---|
| 294 | 336 | |
|---|
| 295 | | - REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 337 | + if (rknpu_dev->config->pc_dma_ctrl) { |
|---|
| 338 | + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
|---|
| 339 | + REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 340 | + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
|---|
| 341 | + } else { |
|---|
| 342 | + REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 343 | + } |
|---|
| 296 | 344 | |
|---|
| 297 | 345 | REG_WRITE((first_task->regcfg_amount + RKNPU_PC_DATA_EXTRA_AMOUNT + |
|---|
| 298 | 346 | pc_data_amount_scale - 1) / |
|---|
| .. | .. |
|---|
| 319 | 367 | return 0; |
|---|
| 320 | 368 | } |
|---|
| 321 | 369 | |
|---|
| 322 | | -static int rknpu_job_commit(struct rknpu_job *job, int core_index) |
|---|
| 370 | +static inline int rknpu_job_subcore_commit(struct rknpu_job *job, |
|---|
| 371 | + int core_index) |
|---|
| 323 | 372 | { |
|---|
| 324 | 373 | struct rknpu_device *rknpu_dev = job->rknpu_dev; |
|---|
| 325 | 374 | struct rknpu_submit *args = job->args; |
|---|
| 326 | 375 | void __iomem *rknpu_core_base = rknpu_dev->base[core_index]; |
|---|
| 376 | + unsigned long flags; |
|---|
| 327 | 377 | |
|---|
| 328 | 378 | // switch to slave mode |
|---|
| 329 | | - REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 379 | + if (rknpu_dev->config->pc_dma_ctrl) { |
|---|
| 380 | + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
|---|
| 381 | + REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 382 | + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
|---|
| 383 | + } else { |
|---|
| 384 | + REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 385 | + } |
|---|
| 330 | 386 | |
|---|
| 331 | | - if (!(args->flags & RKNPU_JOB_PC)) |
|---|
| 332 | | - return -EINVAL; |
|---|
| 387 | + if (!(args->flags & RKNPU_JOB_PC)) { |
|---|
| 388 | + job->ret = -EINVAL; |
|---|
| 389 | + return job->ret; |
|---|
| 390 | + } |
|---|
| 333 | 391 | |
|---|
| 334 | | - return rknpu_job_commit_pc(job, core_index); |
|---|
| 392 | + return rknpu_job_subcore_commit_pc(job, core_index); |
|---|
| 393 | +} |
|---|
| 394 | + |
|---|
| 395 | +static void rknpu_job_commit(struct rknpu_job *job) |
|---|
| 396 | +{ |
|---|
| 397 | + switch (job->args->core_mask) { |
|---|
| 398 | + case RKNPU_CORE0_MASK: |
|---|
| 399 | + rknpu_job_subcore_commit(job, 0); |
|---|
| 400 | + break; |
|---|
| 401 | + case RKNPU_CORE1_MASK: |
|---|
| 402 | + rknpu_job_subcore_commit(job, 1); |
|---|
| 403 | + break; |
|---|
| 404 | + case RKNPU_CORE2_MASK: |
|---|
| 405 | + rknpu_job_subcore_commit(job, 2); |
|---|
| 406 | + break; |
|---|
| 407 | + case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK: |
|---|
| 408 | + rknpu_job_subcore_commit(job, 0); |
|---|
| 409 | + rknpu_job_subcore_commit(job, 1); |
|---|
| 410 | + break; |
|---|
| 411 | + case RKNPU_CORE0_MASK | RKNPU_CORE1_MASK | RKNPU_CORE2_MASK: |
|---|
| 412 | + rknpu_job_subcore_commit(job, 0); |
|---|
| 413 | + rknpu_job_subcore_commit(job, 1); |
|---|
| 414 | + rknpu_job_subcore_commit(job, 2); |
|---|
| 415 | + break; |
|---|
| 416 | + default: |
|---|
| 417 | + LOG_ERROR("Unknown core mask: %d\n", job->args->core_mask); |
|---|
| 418 | + break; |
|---|
| 419 | + } |
|---|
| 335 | 420 | } |
|---|
| 336 | 421 | |
|---|
| 337 | 422 | static void rknpu_job_next(struct rknpu_device *rknpu_dev, int core_index) |
|---|
| .. | .. |
|---|
| 356 | 441 | head[core_index]); |
|---|
| 357 | 442 | |
|---|
| 358 | 443 | list_del_init(&job->head[core_index]); |
|---|
| 359 | | - job->in_queue[core_index] = false; |
|---|
| 360 | 444 | subcore_data->job = job; |
|---|
| 361 | | - job->hw_recoder_time = ktime_get(); |
|---|
| 445 | + job->hw_commit_time = ktime_get(); |
|---|
| 446 | + job->hw_recoder_time = job->hw_commit_time; |
|---|
| 362 | 447 | spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
|---|
| 363 | 448 | |
|---|
| 364 | 449 | if (atomic_dec_and_test(&job->run_count)) { |
|---|
| 365 | | - if (job->args->core_mask & RKNPU_CORE0_MASK) |
|---|
| 366 | | - job->ret = rknpu_job_commit(job, 0); |
|---|
| 367 | | - if (job->args->core_mask & RKNPU_CORE1_MASK) |
|---|
| 368 | | - job->ret = rknpu_job_commit(job, 1); |
|---|
| 369 | | - if (job->args->core_mask & RKNPU_CORE2_MASK) |
|---|
| 370 | | - job->ret = rknpu_job_commit(job, 2); |
|---|
| 450 | + rknpu_job_commit(job); |
|---|
| 371 | 451 | } |
|---|
| 372 | 452 | } |
|---|
| 373 | 453 | |
|---|
| .. | .. |
|---|
| 375 | 455 | { |
|---|
| 376 | 456 | struct rknpu_device *rknpu_dev = job->rknpu_dev; |
|---|
| 377 | 457 | struct rknpu_subcore_data *subcore_data = NULL; |
|---|
| 458 | + ktime_t now; |
|---|
| 378 | 459 | unsigned long flags; |
|---|
| 379 | | - ktime_t now = ktime_get(); |
|---|
| 460 | + int max_submit_number = rknpu_dev->config->max_submit_number; |
|---|
| 461 | + |
|---|
| 462 | + if (atomic_inc_return(&job->submit_count[core_index]) < |
|---|
| 463 | + (rknpu_get_task_number(job, core_index) + max_submit_number - 1) / |
|---|
| 464 | + max_submit_number) { |
|---|
| 465 | + rknpu_job_subcore_commit(job, core_index); |
|---|
| 466 | + return; |
|---|
| 467 | + } |
|---|
| 380 | 468 | |
|---|
| 381 | 469 | subcore_data = &rknpu_dev->subcore_datas[core_index]; |
|---|
| 382 | 470 | |
|---|
| 383 | 471 | spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
|---|
| 384 | 472 | subcore_data->job = NULL; |
|---|
| 385 | | - subcore_data->task_num -= rknn_get_task_number(job, core_index); |
|---|
| 386 | | - subcore_data->timer.busy_time += |
|---|
| 387 | | - ktime_us_delta(now, job->hw_recoder_time); |
|---|
| 473 | + subcore_data->task_num -= rknpu_get_task_number(job, core_index); |
|---|
| 474 | + now = ktime_get(); |
|---|
| 475 | + job->hw_elapse_time = ktime_sub(now, job->hw_commit_time); |
|---|
| 476 | + subcore_data->timer.busy_time += ktime_sub(now, job->hw_recoder_time); |
|---|
| 388 | 477 | spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
|---|
| 389 | 478 | |
|---|
| 390 | 479 | if (atomic_dec_and_test(&job->interrupt_count)) { |
|---|
| .. | .. |
|---|
| 408 | 497 | rknpu_job_next(rknpu_dev, core_index); |
|---|
| 409 | 498 | } |
|---|
| 410 | 499 | |
|---|
| 500 | +static int rknpu_schedule_core_index(struct rknpu_device *rknpu_dev) |
|---|
| 501 | +{ |
|---|
| 502 | + int core_num = rknpu_dev->config->num_irqs; |
|---|
| 503 | + int task_num = rknpu_dev->subcore_datas[0].task_num; |
|---|
| 504 | + int core_index = 0; |
|---|
| 505 | + int i = 0; |
|---|
| 506 | + |
|---|
| 507 | + for (i = 1; i < core_num; i++) { |
|---|
| 508 | + if (task_num > rknpu_dev->subcore_datas[i].task_num) { |
|---|
| 509 | + core_index = i; |
|---|
| 510 | + task_num = rknpu_dev->subcore_datas[i].task_num; |
|---|
| 511 | + } |
|---|
| 512 | + } |
|---|
| 513 | + |
|---|
| 514 | + return core_index; |
|---|
| 515 | +} |
|---|
| 516 | + |
|---|
| 411 | 517 | static void rknpu_job_schedule(struct rknpu_job *job) |
|---|
| 412 | 518 | { |
|---|
| 413 | 519 | struct rknpu_device *rknpu_dev = job->rknpu_dev; |
|---|
| 414 | 520 | struct rknpu_subcore_data *subcore_data = NULL; |
|---|
| 415 | 521 | int i = 0, core_index = 0; |
|---|
| 416 | 522 | unsigned long flags; |
|---|
| 417 | | - int task_num_list[3] = { 0, 1, 2 }; |
|---|
| 418 | | - int tmp = 0; |
|---|
| 419 | 523 | |
|---|
| 420 | | - if ((job->args->core_mask & 0x07) == RKNPU_CORE_AUTO_MASK) { |
|---|
| 421 | | - if (rknpu_dev->subcore_datas[0].task_num > |
|---|
| 422 | | - rknpu_dev->subcore_datas[1].task_num) { |
|---|
| 423 | | - tmp = task_num_list[1]; |
|---|
| 424 | | - task_num_list[1] = task_num_list[0]; |
|---|
| 425 | | - task_num_list[0] = tmp; |
|---|
| 426 | | - } |
|---|
| 427 | | - if (rknpu_dev->subcore_datas[task_num_list[0]].task_num > |
|---|
| 428 | | - rknpu_dev->subcore_datas[2].task_num) { |
|---|
| 429 | | - tmp = task_num_list[2]; |
|---|
| 430 | | - task_num_list[2] = task_num_list[1]; |
|---|
| 431 | | - task_num_list[1] = task_num_list[0]; |
|---|
| 432 | | - task_num_list[0] = tmp; |
|---|
| 433 | | - } else if (rknpu_dev->subcore_datas[task_num_list[1]].task_num > |
|---|
| 434 | | - rknpu_dev->subcore_datas[2].task_num) { |
|---|
| 435 | | - tmp = task_num_list[2]; |
|---|
| 436 | | - task_num_list[2] = task_num_list[1]; |
|---|
| 437 | | - task_num_list[1] = tmp; |
|---|
| 438 | | - } |
|---|
| 439 | | - if (!rknpu_dev->subcore_datas[task_num_list[0]].job) |
|---|
| 440 | | - core_index = task_num_list[0]; |
|---|
| 441 | | - else if (!rknpu_dev->subcore_datas[task_num_list[1]].job) |
|---|
| 442 | | - core_index = task_num_list[1]; |
|---|
| 443 | | - else if (!rknpu_dev->subcore_datas[task_num_list[2]].job) |
|---|
| 444 | | - core_index = task_num_list[2]; |
|---|
| 445 | | - else |
|---|
| 446 | | - core_index = task_num_list[0]; |
|---|
| 447 | | - |
|---|
| 524 | + if (job->args->core_mask == RKNPU_CORE_AUTO_MASK) { |
|---|
| 525 | + core_index = rknpu_schedule_core_index(rknpu_dev); |
|---|
| 448 | 526 | job->args->core_mask = rknpu_core_mask(core_index); |
|---|
| 449 | 527 | job->use_core_num = 1; |
|---|
| 450 | 528 | atomic_set(&job->run_count, job->use_core_num); |
|---|
| .. | .. |
|---|
| 456 | 534 | if (job->args->core_mask & rknpu_core_mask(i)) { |
|---|
| 457 | 535 | subcore_data = &rknpu_dev->subcore_datas[i]; |
|---|
| 458 | 536 | list_add_tail(&job->head[i], &subcore_data->todo_list); |
|---|
| 459 | | - subcore_data->task_num += rknn_get_task_number(job, i); |
|---|
| 460 | | - job->in_queue[i] = true; |
|---|
| 537 | + subcore_data->task_num += rknpu_get_task_number(job, i); |
|---|
| 461 | 538 | } |
|---|
| 462 | 539 | } |
|---|
| 463 | 540 | spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
|---|
| .. | .. |
|---|
| 477 | 554 | |
|---|
| 478 | 555 | msleep(100); |
|---|
| 479 | 556 | |
|---|
| 557 | + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
|---|
| 480 | 558 | for (i = 0; i < rknpu_dev->config->num_irqs; i++) { |
|---|
| 481 | 559 | if (job->args->core_mask & rknpu_core_mask(i)) { |
|---|
| 482 | 560 | subcore_data = &rknpu_dev->subcore_datas[i]; |
|---|
| 483 | | - spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
|---|
| 484 | 561 | if (job == subcore_data->job && !job->irq_entry[i]) { |
|---|
| 485 | 562 | subcore_data->job = NULL; |
|---|
| 486 | 563 | subcore_data->task_num -= |
|---|
| 487 | | - rknn_get_task_number(job, i); |
|---|
| 564 | + rknpu_get_task_number(job, i); |
|---|
| 488 | 565 | } |
|---|
| 489 | | - spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
|---|
| 490 | 566 | } |
|---|
| 491 | 567 | } |
|---|
| 568 | + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
|---|
| 492 | 569 | |
|---|
| 493 | 570 | if (job->ret == -ETIMEDOUT) { |
|---|
| 494 | 571 | LOG_ERROR("job timeout, flags: %#x:\n", job->flags); |
|---|
| .. | .. |
|---|
| 505 | 582 | rknpu_dev->config |
|---|
| 506 | 583 | ->pc_task_status_offset) & |
|---|
| 507 | 584 | rknpu_dev->config->pc_task_number_mask), |
|---|
| 508 | | - ktime_to_us(ktime_sub(ktime_get(), |
|---|
| 509 | | - job->timestamp))); |
|---|
| 585 | + ktime_us_delta(ktime_get(), |
|---|
| 586 | + job->timestamp)); |
|---|
| 510 | 587 | } |
|---|
| 511 | 588 | } |
|---|
| 512 | 589 | rknpu_soft_reset(rknpu_dev); |
|---|
| .. | .. |
|---|
| 514 | 591 | LOG_ERROR( |
|---|
| 515 | 592 | "job abort, flags: %#x, ret: %d, elapsed time: %lldus\n", |
|---|
| 516 | 593 | job->flags, job->ret, |
|---|
| 517 | | - ktime_to_us(ktime_sub(ktime_get(), job->timestamp))); |
|---|
| 594 | + ktime_us_delta(ktime_get(), job->timestamp)); |
|---|
| 518 | 595 | } |
|---|
| 519 | 596 | |
|---|
| 520 | 597 | rknpu_job_cleanup(job); |
|---|
| .. | .. |
|---|
| 609 | 686 | { |
|---|
| 610 | 687 | struct rknpu_job *job = NULL; |
|---|
| 611 | 688 | unsigned long flags; |
|---|
| 612 | | - ktime_t now = ktime_get(); |
|---|
| 613 | 689 | struct rknpu_subcore_data *subcore_data = NULL; |
|---|
| 614 | 690 | int i = 0; |
|---|
| 615 | 691 | |
|---|
| .. | .. |
|---|
| 618 | 694 | subcore_data = &rknpu_dev->subcore_datas[i]; |
|---|
| 619 | 695 | job = subcore_data->job; |
|---|
| 620 | 696 | if (job && |
|---|
| 621 | | - ktime_to_ms(ktime_sub(now, job->timestamp)) >= |
|---|
| 697 | + ktime_us_delta(ktime_get(), job->timestamp) >= |
|---|
| 622 | 698 | job->args->timeout) { |
|---|
| 623 | 699 | rknpu_soft_reset(rknpu_dev); |
|---|
| 624 | 700 | |
|---|
| .. | .. |
|---|
| 640 | 716 | struct rknpu_job, |
|---|
| 641 | 717 | head[i]); |
|---|
| 642 | 718 | list_del_init(&job->head[i]); |
|---|
| 643 | | - job->in_queue[i] = false; |
|---|
| 644 | 719 | } else { |
|---|
| 645 | 720 | job = NULL; |
|---|
| 646 | 721 | } |
|---|
| .. | .. |
|---|
| 661 | 736 | |
|---|
| 662 | 737 | if (args->task_number == 0) { |
|---|
| 663 | 738 | LOG_ERROR("invalid rknpu task number!\n"); |
|---|
| 739 | + return -EINVAL; |
|---|
| 740 | + } |
|---|
| 741 | + |
|---|
| 742 | + if (args->core_mask > rknpu_dev->config->core_mask) { |
|---|
| 743 | + LOG_ERROR("invalid rknpu core mask: %#x", args->core_mask); |
|---|
| 664 | 744 | return -EINVAL; |
|---|
| 665 | 745 | } |
|---|
| 666 | 746 | |
|---|
| .. | .. |
|---|
| 861 | 941 | int rknpu_clear_rw_amount(struct rknpu_device *rknpu_dev) |
|---|
| 862 | 942 | { |
|---|
| 863 | 943 | void __iomem *rknpu_core_base = rknpu_dev->base[0]; |
|---|
| 944 | + unsigned long flags; |
|---|
| 864 | 945 | |
|---|
| 865 | 946 | if (!rknpu_dev->config->bw_enable) { |
|---|
| 866 | 947 | LOG_WARN("Clear rw_amount is not supported on this device!\n"); |
|---|
| 867 | 948 | return 0; |
|---|
| 868 | 949 | } |
|---|
| 869 | 950 | |
|---|
| 870 | | - spin_lock(&rknpu_dev->lock); |
|---|
| 871 | | - |
|---|
| 872 | 951 | if (rknpu_dev->config->pc_dma_ctrl) { |
|---|
| 873 | | - uint32_t pc_data_addr = REG_READ(RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 952 | + uint32_t pc_data_addr = 0; |
|---|
| 953 | + |
|---|
| 954 | + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); |
|---|
| 955 | + pc_data_addr = REG_READ(RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 874 | 956 | |
|---|
| 875 | 957 | REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 876 | 958 | REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); |
|---|
| 877 | 959 | REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); |
|---|
| 878 | 960 | REG_WRITE(pc_data_addr, RKNPU_OFFSET_PC_DATA_ADDR); |
|---|
| 961 | + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); |
|---|
| 879 | 962 | } else { |
|---|
| 963 | + spin_lock(&rknpu_dev->lock); |
|---|
| 880 | 964 | REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); |
|---|
| 881 | 965 | REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); |
|---|
| 966 | + spin_unlock(&rknpu_dev->lock); |
|---|
| 882 | 967 | } |
|---|
| 883 | | - |
|---|
| 884 | | - spin_unlock(&rknpu_dev->lock); |
|---|
| 885 | 968 | |
|---|
| 886 | 969 | return 0; |
|---|
| 887 | 970 | } |
|---|